mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-24 15:44:58 +08:00
Compare commits
8 Commits
context_te
...
v2.7.1-rc1
| Author | SHA1 | Date | |
|---|---|---|---|
| 27e9ca5d36 | |||
| dab8130f4f | |||
| 20d62a8d25 | |||
| cd885e7c9a | |||
| 99847860ea | |||
| 24b0c4abfc | |||
| 2dc4b15cf3 | |||
| cd6037ed4b |
4
.github/actionlint.yaml
vendored
4
.github/actionlint.yaml
vendored
@ -3,8 +3,8 @@ self-hosted-runner:
|
||||
# GitHub hosted runner that actionlint doesn't recognize because actionlint version (1.6.21) is too old
|
||||
- ubuntu-24.04
|
||||
# GitHub hosted x86 Linux runners
|
||||
- linux.20_04.4x
|
||||
- linux.20_04.16x
|
||||
- linux.24_04.4x
|
||||
- linux.24_04.16x
|
||||
# Organization-wide AWS Linux Runners
|
||||
- linux.large
|
||||
- linux.2xlarge
|
||||
|
||||
@ -417,8 +417,6 @@ def generate_wheels_matrix(
|
||||
"pytorch_extra_install_requirements": (
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["xpu"]
|
||||
if gpu_arch_type == "xpu"
|
||||
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS[CUDA_STABLE]
|
||||
if os != "linux"
|
||||
else ""
|
||||
),
|
||||
}
|
||||
|
||||
2
.github/workflows/check-labels.yml
vendored
2
.github/workflows/check-labels.yml
vendored
@ -35,7 +35,7 @@ jobs:
|
||||
pull-requests: write
|
||||
name: Check labels
|
||||
if: github.repository_owner == 'pytorch'
|
||||
runs-on: linux.20_04.4x
|
||||
runs-on: linux.24_04.4x
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.7
|
||||
|
||||
6
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
6
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -64,7 +64,6 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_9-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cpu-aarch64-test: # Testing
|
||||
@ -181,7 +180,6 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cpu-aarch64-test: # Testing
|
||||
@ -298,7 +296,6 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cpu-aarch64-test: # Testing
|
||||
@ -415,7 +412,6 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cpu-aarch64-test: # Testing
|
||||
@ -532,7 +528,6 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cpu-aarch64-test: # Testing
|
||||
@ -649,7 +644,6 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cpu-aarch64-test: # Testing
|
||||
|
||||
5
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
5
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
@ -63,7 +63,6 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_9-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cpu-s390x-test: # Testing
|
||||
@ -128,7 +127,6 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_10-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cpu-s390x-test: # Testing
|
||||
@ -193,7 +191,6 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_11-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cpu-s390x-test: # Testing
|
||||
@ -258,7 +255,6 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_12-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cpu-s390x-test: # Testing
|
||||
@ -323,7 +319,6 @@ jobs:
|
||||
timeout-minutes: 420
|
||||
build_name: manywheel-py3_13-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cpu-s390x-test: # Testing
|
||||
|
||||
6
.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
generated
vendored
6
.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
generated
vendored
@ -43,7 +43,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -166,7 +165,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -289,7 +287,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -412,7 +409,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -535,7 +531,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -658,7 +653,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
|
||||
1
.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
generated
vendored
1
.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
generated
vendored
@ -54,7 +54,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
|
||||
24
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
24
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
@ -54,7 +54,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -288,7 +287,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -524,7 +522,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -760,7 +757,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1228,7 +1224,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1462,7 +1457,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1698,7 +1692,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1934,7 +1927,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2402,7 +2394,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2636,7 +2627,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2872,7 +2862,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -3108,7 +3097,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -3576,7 +3564,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -3810,7 +3797,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4046,7 +4032,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4282,7 +4267,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4750,7 +4734,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4984,7 +4967,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -5220,7 +5202,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -5456,7 +5437,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -5924,7 +5904,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -6158,7 +6137,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -6394,7 +6372,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -6630,7 +6607,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
|
||||
6
.github/workflows/lint.yml
vendored
6
.github/workflows/lint.yml
vendored
@ -216,7 +216,7 @@ jobs:
|
||||
test_run_test:
|
||||
name: Test `run_test.py` is usable without boto3
|
||||
if: ${{ github.repository == 'pytorch/pytorch' }}
|
||||
runs-on: linux.20_04.4x
|
||||
runs-on: linux.24_04.4x
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.7
|
||||
@ -242,7 +242,7 @@ jobs:
|
||||
test_collect_env:
|
||||
if: ${{ github.repository == 'pytorch/pytorch' }}
|
||||
name: Test collect_env
|
||||
runs-on: linux.20_04.4x
|
||||
runs-on: linux.24_04.4x
|
||||
strategy:
|
||||
matrix:
|
||||
test_type: [with_torch, without_torch, older_python_version]
|
||||
@ -265,7 +265,7 @@ jobs:
|
||||
if: matrix.test_type == 'older_python_version'
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.6
|
||||
python-version: 3.8
|
||||
architecture: x64
|
||||
check-latest: false
|
||||
cache: pip
|
||||
|
||||
2
.github/workflows/revert.yml
vendored
2
.github/workflows/revert.yml
vendored
@ -7,7 +7,7 @@ on:
|
||||
jobs:
|
||||
do_revert:
|
||||
name: try_revert_pr_${{ github.event.client_payload.pr_num }}
|
||||
runs-on: linux.20_04.4x
|
||||
runs-on: linux.24_04.4x
|
||||
environment: mergebot
|
||||
env:
|
||||
GH_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
|
||||
2
.github/workflows/trymerge.yml
vendored
2
.github/workflows/trymerge.yml
vendored
@ -7,7 +7,7 @@ on:
|
||||
jobs:
|
||||
do_merge:
|
||||
name: try_merge_pr_${{ github.event.client_payload.pr_num }}
|
||||
runs-on: linux.20_04.4x
|
||||
runs-on: linux.24_04.4x
|
||||
environment: mergebot
|
||||
permissions:
|
||||
id-token: write
|
||||
|
||||
@ -540,7 +540,7 @@ Vectorized<float> inline fmadd(const Vectorized<float>& a, const Vectorized<floa
|
||||
|
||||
template <>
|
||||
Vectorized<float> inline fmsub(const Vectorized<float>& a, const Vectorized<float>& b, const Vectorized<float>& c) {
|
||||
return Vectorized<float>(vfmsq_f32(c, a, b));
|
||||
return Vectorized<float>(vnegq_f32(vfmsq_f32(c, a, b)));
|
||||
}
|
||||
|
||||
inline Vectorized<float> Vectorized<float>::erf() const{
|
||||
|
||||
@ -582,7 +582,7 @@ Vectorized<c10::Half> inline fmsub(
|
||||
const Vectorized<c10::Half>& b,
|
||||
const Vectorized<c10::Half>& c) {
|
||||
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
|
||||
return Vectorized<c10::Half>(vfmsq_f16(c, a, b));
|
||||
return Vectorized<c10::Half>(vnegq_f16(vfmsq_f16(c, a, b)));
|
||||
#else
|
||||
return a * b - c;
|
||||
#endif
|
||||
|
||||
@ -133,6 +133,69 @@ constexpr auto calc_io_size(){
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef USE_ROCM
|
||||
// To save on binary size of libtorch_cuda.so, we split the vectorized_elementwise_kernel
|
||||
// into two: one for vec_size=8 and one for vec_size=[2, 4], since vec8 is going to be
|
||||
// used on sm_90 and sm_100 exclusively.
|
||||
template <int vec_size, typename func_t, typename array_t>
|
||||
C10_LAUNCH_BOUNDS_1(num_threads())
|
||||
__global__ void vectorized_elementwise_kernel(int N, func_t f, array_t data) {
|
||||
if constexpr (vec_size == 8) {
|
||||
#if __CUDA_ARCH__ == 900 || __CUDA_ARCH__ == 1000
|
||||
using traits = function_traits<func_t>;
|
||||
constexpr auto io_size = calc_io_size<func_t>();
|
||||
int remaining = N - io_block_work_size<io_size>() * blockIdx.x;
|
||||
|
||||
if (remaining < io_block_work_size<io_size>()) { // if this block handles the reminder,
|
||||
// just do a naive unrolled loop
|
||||
auto input_calc = TrivialOffsetCalculator<traits::arity>();
|
||||
auto output_calc = TrivialOffsetCalculator<1>();
|
||||
auto loader = memory::LoadWithoutCast();
|
||||
auto storer = memory::StoreWithoutCast();
|
||||
auto policy = memory::policies::unroll<
|
||||
array_t,
|
||||
decltype(input_calc),
|
||||
decltype(output_calc),
|
||||
memory::LoadWithoutCast,
|
||||
memory::StoreWithoutCast,
|
||||
elems_per_thread<io_size>()>(
|
||||
data, remaining, input_calc, output_calc, loader, storer);
|
||||
elementwise_kernel_helper(f, policy);
|
||||
} else { // if this block has a full `block_work_size` data to handle, use
|
||||
// vectorized memory access
|
||||
elementwise_kernel_helper(
|
||||
f, memory::policies::vectorized<vec_size, array_t, elems_per_thread<io_size>()>(data));
|
||||
}
|
||||
#endif // __CUDA_ARCH__ == 900 || __CUDA_ARCH__ == 1000
|
||||
} else {
|
||||
using traits = function_traits<func_t>;
|
||||
constexpr auto io_size = calc_io_size<func_t>();
|
||||
int remaining = N - io_block_work_size<io_size>() * blockIdx.x;
|
||||
|
||||
if (remaining < io_block_work_size<io_size>()) { // if this block handles the reminder,
|
||||
// just do a naive unrolled loop
|
||||
auto input_calc = TrivialOffsetCalculator<traits::arity>();
|
||||
auto output_calc = TrivialOffsetCalculator<1>();
|
||||
auto loader = memory::LoadWithoutCast();
|
||||
auto storer = memory::StoreWithoutCast();
|
||||
auto policy = memory::policies::unroll<
|
||||
array_t,
|
||||
decltype(input_calc),
|
||||
decltype(output_calc),
|
||||
memory::LoadWithoutCast,
|
||||
memory::StoreWithoutCast,
|
||||
elems_per_thread<io_size>()>(
|
||||
data, remaining, input_calc, output_calc, loader, storer);
|
||||
elementwise_kernel_helper(f, policy);
|
||||
} else { // if this block has a full `block_work_size` data to handle, use
|
||||
// vectorized memory access
|
||||
elementwise_kernel_helper(
|
||||
f, memory::policies::vectorized<vec_size, array_t, elems_per_thread<io_size>()>(data));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else // USE_ROCM
|
||||
template <int vec_size, typename func_t, typename array_t>
|
||||
C10_LAUNCH_BOUNDS_1(num_threads())
|
||||
__global__ void vectorized_elementwise_kernel(int N, func_t f, array_t data) {
|
||||
@ -157,15 +220,12 @@ __global__ void vectorized_elementwise_kernel(int N, func_t f, array_t data) {
|
||||
elementwise_kernel_helper(f, policy);
|
||||
} else { // if this block has a full `block_work_size` data to handle, use
|
||||
// vectorized memory access
|
||||
#ifdef USE_ROCM
|
||||
constexpr auto optimal_vec_size = calc_optimal_vec_size<vec_size, io_size>();
|
||||
#else
|
||||
constexpr auto optimal_vec_size = vec_size;
|
||||
#endif
|
||||
elementwise_kernel_helper(
|
||||
f, memory::policies::vectorized<optimal_vec_size, array_t, elems_per_thread<io_size>()>(data));
|
||||
}
|
||||
}
|
||||
#endif // USE_ROCM
|
||||
|
||||
template <
|
||||
typename func_t,
|
||||
@ -212,6 +272,11 @@ static inline void launch_vectorized_kernel(
|
||||
// Here we purposely omit vec8 for 1-byte data because of a bug in NVCC
|
||||
// that causes some numerical mismatches with uint8 on sm80 and sm90.
|
||||
// TODO: Revisit this after CUDA 12.8 update.
|
||||
cudaDeviceProp* p = at::cuda::getDeviceProperties(stream.device().index());
|
||||
const int computeCapability = p->major * 10 + p->minor;
|
||||
if (computeCapability != 90 && computeCapability != 100) {
|
||||
vec_size = std::min<uint16_t>(vec_size, 4);
|
||||
}
|
||||
if constexpr (sizeof(cpp_type) < 2) {
|
||||
vec_size = std::min<uint16_t>(vec_size, 4);
|
||||
}
|
||||
|
||||
@ -19,6 +19,7 @@ std::vector<int64_t> pool_output_sizes(
|
||||
output_size[1] = input_size[1];
|
||||
|
||||
for (const auto i : c10::irange(2, input_size.size())) {
|
||||
TORCH_CHECK_VALUE(stride[i -2] > 0, "Strides must be positive!");
|
||||
output_size[i] = pooling_output_shape_pad_lr<int64_t>(
|
||||
input_size[i],
|
||||
kernel_size[i - 2],
|
||||
|
||||
@ -64,6 +64,16 @@ CACHE_ALIGN #define
|
||||
#undef CHECK_WITH_FMA
|
||||
#endif
|
||||
|
||||
template <typename scalar_t>
|
||||
struct OpMathType {
|
||||
using type = scalar_t;
|
||||
};
|
||||
template <>
|
||||
struct OpMathType<c10::Half> {
|
||||
using type = float;
|
||||
};
|
||||
|
||||
|
||||
template<typename T>
|
||||
using Complex = typename c10::complex<T>;
|
||||
|
||||
@ -1279,15 +1289,17 @@ std::enable_if_t<is_complex<Complex<T>>::value, Complex<T>> local_division(Compl
|
||||
template <typename T>
|
||||
std::enable_if_t<!is_complex<T>::value, T> local_fmadd(T a, T b, T c) {
|
||||
PreventFma noFma;
|
||||
T ab = a * b;
|
||||
return noFma.add(ab, c);
|
||||
using op_math_t = typename OpMathType<T>::type;
|
||||
auto ab = static_cast<op_math_t>(a) * static_cast<op_math_t>(b);
|
||||
return static_cast<T>(noFma.add(ab, op_math_t(c)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<!is_complex<T>::value, T> local_fmsub(T a, T b, T c) {
|
||||
PreventFma noFma;
|
||||
T ab = a * b;
|
||||
return noFma.sub(ab, c);
|
||||
using op_math_t = typename OpMathType<T>::type;
|
||||
auto ab = static_cast<op_math_t>(a) * static_cast<op_math_t>(b);
|
||||
return static_cast<T>(noFma.sub(ab, op_math_t(c)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -82,7 +82,7 @@ ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,5
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,5
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1171,6 +1171,9 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
endif()
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
|
||||
# Suppress warning to unblock libnop comiplation by clang-17
|
||||
# See https://github.com/pytorch/pytorch/issues/151316
|
||||
target_compile_options_if_supported(tensorpipe -Wno-missing-template-arg-list-after-template-kw)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
unset(CMAKE_POLICY_VERSION_MINIMUM)
|
||||
endif()
|
||||
|
||||
@ -671,56 +671,6 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):
|
||||
except ModuleNotFoundError:
|
||||
self._helper_test_extra_cuda_context_by_memory()
|
||||
|
||||
@requires_nccl()
|
||||
@skip_if_lt_x_gpu(2)
|
||||
def test_extra_cuda_context_sync_ops(self):
|
||||
# Loop a bunch of sync ops and see if any of them creates extra context.
|
||||
# Requires nvml to check number of processes resident on a device.
|
||||
try:
|
||||
import pynvml
|
||||
|
||||
pynvml.nvmlInit()
|
||||
except Exception:
|
||||
self.skipTest("pynvml not available")
|
||||
|
||||
# Check if non-0 ranks would create extra CUDA context on device 0
|
||||
store = c10d.FileStore(self.file_name, self.world_size)
|
||||
device = torch.device(f"cuda:{self.rank:d}")
|
||||
c10d.init_process_group(
|
||||
backend="nccl",
|
||||
store=store,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
device_id=device,
|
||||
)
|
||||
|
||||
x = torch.empty((1,), device=device)
|
||||
y = torch.empty((self.world_size,), device=device)
|
||||
|
||||
c10d.all_reduce(x)
|
||||
c10d.reduce(x, dst=0)
|
||||
c10d.broadcast(x, src=0)
|
||||
c10d.all_gather_into_tensor(y, x)
|
||||
c10d.reduce_scatter_tensor(x, y)
|
||||
c10d.barrier()
|
||||
|
||||
# Wait a bit for remote processes to touch my device
|
||||
if self.rank == 0:
|
||||
time.sleep(5)
|
||||
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(self.rank)
|
||||
processes = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
|
||||
nprocs = len(processes)
|
||||
|
||||
# Don't exit till rank 0 is done with the nvml detection
|
||||
c10d.barrier()
|
||||
c10d.destroy_process_group()
|
||||
self.assertLessEqual(
|
||||
nprocs,
|
||||
1,
|
||||
f"Found {nprocs} processes creating contexts on {device}, expecting 1 at most",
|
||||
)
|
||||
|
||||
@requires_nccl()
|
||||
@skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs")
|
||||
def test_destruct_before_terminate_pg(self):
|
||||
@ -3555,6 +3505,17 @@ class CommTest(test_c10d_common.AbstractCommTest, MultiProcessTestCase):
|
||||
|
||||
c10d.barrier(device_ids=[self.rank])
|
||||
|
||||
@requires_nccl()
|
||||
@skip_if_lt_x_gpu(2)
|
||||
def test_nccl_barrier_device_ids_function_argument(self):
|
||||
store = c10d.FileStore(self.file_name, self.world_size)
|
||||
c10d.init_process_group(
|
||||
backend="nccl", rank=self.rank, world_size=self.world_size, store=store
|
||||
)
|
||||
|
||||
with self.assertRaisesRegex(TypeError, "Invalid function argument"):
|
||||
c10d.barrier(device_ids=self.rank)
|
||||
|
||||
@requires_nccl()
|
||||
@skip_if_lt_x_gpu(2)
|
||||
def test_unwaited(self) -> None:
|
||||
|
||||
@ -11981,6 +11981,10 @@ fn
|
||||
self.assertEqual(y, t.sin())
|
||||
|
||||
def test_overridden_getattribute(self):
|
||||
class Bar:
|
||||
def __init__(self, v):
|
||||
self.v = v
|
||||
|
||||
class Foo:
|
||||
attribute_map = {}
|
||||
|
||||
@ -11988,6 +11992,9 @@ fn
|
||||
self.attribute_map = {
|
||||
"a_premap": "a",
|
||||
}
|
||||
# `bar` attribute requires propagating sources correctly through
|
||||
# object.__getattribute__
|
||||
self.bar = Bar(5)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
if key in super().__getattribute__("attribute_map"):
|
||||
@ -12015,7 +12022,7 @@ fn
|
||||
return f
|
||||
|
||||
def fn(x, f):
|
||||
return x * f.a_premap * f.a * f.b * f.sentinel
|
||||
return x * f.a_premap * f.a * f.b * f.sentinel * f.bar.v
|
||||
|
||||
x = torch.randn(4)
|
||||
|
||||
|
||||
@ -1848,9 +1848,9 @@ class GraphModule(torch.nn.Module):
|
||||
extern_node_serializer: Optional[Callable[[list[Any]], Any]] = None,
|
||||
):
|
||||
if dynamic:
|
||||
self.assertEqual(static_input_idxs, [0, 1, 2, 3, 4])
|
||||
self.assertEqual(static_input_idxs, [2, 3, 4])
|
||||
else:
|
||||
self.assertEqual(static_input_idxs, [0, 1, 2])
|
||||
self.assertEqual(static_input_idxs, [1, 2])
|
||||
return gm
|
||||
|
||||
compiler = functools.partial(compile_fx, inner_compile=inner_compile)
|
||||
|
||||
@ -2340,6 +2340,40 @@ if HAS_CUDA:
|
||||
self.run_static_input_param_test(fn, 4)
|
||||
self.assertEqual(counters["inductor"]["cudagraph_skips"], 0)
|
||||
|
||||
@torch._dynamo.config.patch("error_on_recompile", True)
|
||||
@torch._dynamo.config.patch("inline_inbuilt_nn_modules", True)
|
||||
def test_no_rerecord_with_mark_static_address(self):
|
||||
class Mod(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.linear = nn.Linear(2, 2)
|
||||
|
||||
def forward(self, x):
|
||||
return self.linear(x)
|
||||
|
||||
mod = Mod().cuda()
|
||||
|
||||
def fn_eager(x, marked_static_y):
|
||||
return torch.cos(x) + mod(marked_static_y)
|
||||
|
||||
with torch.device("cuda"):
|
||||
fn_compiled = torch.compile(fn_eager, mode="reduce-overhead")
|
||||
|
||||
# y is marked static
|
||||
y = torch.randn(2, 2)
|
||||
torch._dynamo.mark_static_address(y)
|
||||
|
||||
# Chanhing pointer of x should not lead to re-records
|
||||
for _ in range(5):
|
||||
x = torch.randn(2, 2, requires_grad=True)
|
||||
res = fn_compiled(x, y)
|
||||
res.sum().backward()
|
||||
x.grad = None
|
||||
mod.linear.weight.grad = None
|
||||
mod.linear.bias.grad = None
|
||||
# One forward and one backward
|
||||
self.assertEqual(self.get_manager().new_graph_id().id, 2)
|
||||
|
||||
def test_tensor_constant_mutation(self):
|
||||
class Foo(torch.nn.Module):
|
||||
def __init__(self) -> None:
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import contextlib
|
||||
import copy
|
||||
import functools
|
||||
import importlib
|
||||
import itertools
|
||||
@ -375,6 +376,35 @@ class OptimizeForInferenceTemplate(TestCase):
|
||||
):
|
||||
mod(x)
|
||||
|
||||
def test_static_indices_cudagraph(self):
|
||||
if self.device != "cuda":
|
||||
return
|
||||
|
||||
mod1 = torch.nn.Sequential(
|
||||
torch.nn.Linear(2, 2).to(self.device), torch.nn.Linear(2, 2).to(self.device)
|
||||
)
|
||||
mod2 = copy.deepcopy(mod1)
|
||||
|
||||
def fn(x, y, mod):
|
||||
x.add_(1)
|
||||
getattr(mod, "0").bias.add_(2)
|
||||
getattr(mod, "1").weight.add_(3)
|
||||
return mod(x) + y
|
||||
|
||||
x1 = torch.randn(2, 2, device=self.device)
|
||||
y1 = torch.randn(2, 2, device=self.device)
|
||||
x2 = x1.clone()
|
||||
y2 = y1.clone()
|
||||
|
||||
opt_fn = torch.compile(fn, mode="reduce-overhead")
|
||||
|
||||
with torch.no_grad():
|
||||
ref = fn(x1, y1, mod1)
|
||||
res = opt_fn(x2, y2, mod2)
|
||||
self.assertEqual(ref, res)
|
||||
self.assertEqual(x1, x2)
|
||||
self.assertEqual(y1, y2)
|
||||
|
||||
def test_rng_op(self):
|
||||
@torch.compile()
|
||||
def foo():
|
||||
|
||||
@ -1623,6 +1623,12 @@ class TestMkldnn(TestCase):
|
||||
# Above should trigger no warnings regardless of configuration
|
||||
self.assertEqual(len(w), 0)
|
||||
|
||||
def test_mkldnn_error_on_zero_stride(self, device):
|
||||
# Regression test for https://github.com/pytorch/pytorch/issues/149274
|
||||
x = torch.rand(1, 2, 3, 3).to_mkldnn()
|
||||
with self.assertRaises(ValueError):
|
||||
torch.mkldnn_max_pool2d(x, kernel_size=3, stride=0)
|
||||
|
||||
|
||||
instantiate_device_type_tests(TestMkldnn, globals(), only_for=('cpu',))
|
||||
|
||||
|
||||
@ -102,6 +102,7 @@ from .source import (
|
||||
FlattenScriptObjectSource,
|
||||
FloatTensorSource,
|
||||
FSDPNNModuleSource,
|
||||
GenericAttrSource,
|
||||
GetItemSource,
|
||||
GlobalSource,
|
||||
GlobalStateSource,
|
||||
@ -1046,6 +1047,14 @@ class GuardBuilder(GuardBuilderBase):
|
||||
example_value=example_value,
|
||||
guard_manager_enum=guard_manager_enum,
|
||||
)
|
||||
elif istype(source, GenericAttrSource):
|
||||
assert base_guard_manager # to make mypy happy
|
||||
out = base_guard_manager.generic_getattr_manager(
|
||||
attr=source.member,
|
||||
source=source_name,
|
||||
example_value=example_value,
|
||||
guard_manager_enum=guard_manager_enum,
|
||||
)
|
||||
elif istype(source, (AttrSource, UnspecializedParamBufferSource)):
|
||||
assert base_guard_manager # to make mypy happy
|
||||
|
||||
|
||||
@ -240,6 +240,30 @@ class AttrSource(ChainedSource):
|
||||
return f"{self.base.name()}.{self.member}"
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class GenericAttrSource(ChainedSource):
|
||||
member: str
|
||||
|
||||
def __post_init__(self):
|
||||
assert self.base, "Can't construct an AttrSource without a valid base source"
|
||||
if "." in self.member:
|
||||
member_parts = self.member.split(".")
|
||||
object.__setattr__(
|
||||
self, "base", AttrSource(self.base, ".".join(member_parts[:-1]))
|
||||
)
|
||||
object.__setattr__(self, "member", member_parts[-1])
|
||||
|
||||
def reconstruct(self, codegen):
|
||||
codegen(self.base)
|
||||
codegen.extend_output(codegen.create_load_attrs(self.member))
|
||||
|
||||
def guard_source(self):
|
||||
return self.base.guard_source()
|
||||
|
||||
def name(self):
|
||||
return f"object.__getattribute__({self.base.name()}, {self.member!r})"
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class LocalCellSource(Source):
|
||||
"""
|
||||
|
||||
@ -38,7 +38,13 @@ from ..create_parameter_op import do_not_convert_to_tracable_parameter
|
||||
from ..exc import raise_observed_exception, unimplemented
|
||||
from ..guards import GuardBuilder, install_guard
|
||||
from ..mutation_guard import unpatched_nn_module_init
|
||||
from ..source import AttrSource, GetItemSource, TypeSource, WeakRefCallSource
|
||||
from ..source import (
|
||||
AttrSource,
|
||||
GenericAttrSource,
|
||||
GetItemSource,
|
||||
TypeSource,
|
||||
WeakRefCallSource,
|
||||
)
|
||||
from ..utils import (
|
||||
check_unspec_or_constant_args,
|
||||
cmp_name_to_op_mapping,
|
||||
@ -260,12 +266,16 @@ class SuperVariable(VariableTracker):
|
||||
return result
|
||||
|
||||
try:
|
||||
attr_value = self.objvar.value.__getattribute__(attr_name)
|
||||
# NB - use object.__getattribute__ to prevent running any user code
|
||||
attr_value = object.__getattribute__(self.objvar.value, attr_name)
|
||||
except AttributeError:
|
||||
raise_observed_exception(AttributeError, tx)
|
||||
|
||||
source = self.source and AttrSource(self.source, attr_name)
|
||||
return VariableTracker.build(tx, attr_value, source)
|
||||
attr_source = None
|
||||
if self.objvar.source is not None:
|
||||
# setup a object.__getattribute__(self.objvar, name) source
|
||||
attr_source = GenericAttrSource(self.objvar.source, attr_name)
|
||||
return VariableTracker.build(tx, attr_value, attr_source)
|
||||
|
||||
unimplemented(f"non-function or method super: {inner_fn}")
|
||||
|
||||
|
||||
@ -1022,18 +1022,20 @@ def _try_get_metadata_from_dynamo(
|
||||
seen_sources = set()
|
||||
|
||||
aot_autograd_arg_pos_to_source = []
|
||||
static_input_indices = []
|
||||
# Collect the new inputs lifted by aotdispatch
|
||||
for name in param_keys:
|
||||
for i, name in enumerate(param_keys):
|
||||
assert name in param_name_to_source, f"{name} not found."
|
||||
source = param_name_to_source[name]
|
||||
assert source not in seen_sources, source
|
||||
seen_sources.add(source)
|
||||
aot_autograd_arg_pos_to_source.append(source)
|
||||
|
||||
static_input_indices.append(i)
|
||||
|
||||
# Collect the dynamo graph inputs
|
||||
# TODO(mlazos): Revisit if this is still needed. With Dynamo install ID
|
||||
# matched tensors back into the Fx graph, this might not be necessary.
|
||||
static_input_indices = []
|
||||
for pos, node in enumerate(mod.graph.find_nodes(op="placeholder")):
|
||||
assert hasattr(node, "_dynamo_source")
|
||||
source = node._dynamo_source
|
||||
@ -1042,16 +1044,22 @@ def _try_get_metadata_from_dynamo(
|
||||
aot_autograd_arg_pos_to_source.append(source)
|
||||
source_name = source.name() if source else str(source)
|
||||
|
||||
# input[i] in dynamo is now:
|
||||
# input[i + len(extra_params)] in AOT,
|
||||
# where extra_params are the params/buffers that dynamo baked into the
|
||||
# OutputGraph
|
||||
actual_pos = pos + len(param_keys)
|
||||
|
||||
if "tensor_dict" in node.meta and node.meta["tensor_dict"].get(
|
||||
"_dynamo_static_input_type", None
|
||||
):
|
||||
static_inputs_log.debug(
|
||||
"Adding static input pos %s for source %s", pos, source_name
|
||||
"Adding static input pos %s for source %s", actual_pos, source_name
|
||||
)
|
||||
static_input_indices.append(pos)
|
||||
static_input_indices.append(actual_pos)
|
||||
else:
|
||||
static_inputs_log.debug(
|
||||
"Non-static input pos %s for source %s", pos, source_name
|
||||
"Non-static input pos %s for source %s", actual_pos, source_name
|
||||
)
|
||||
|
||||
assert full_args_num == len(aot_autograd_arg_pos_to_source)
|
||||
|
||||
@ -203,7 +203,7 @@ def get_static_input_idxs(num_fixed: int) -> list[int]:
|
||||
if not context or not context.fw_metadata:
|
||||
return fixed
|
||||
|
||||
return fixed + context.fw_metadata.static_input_indices
|
||||
return context.fw_metadata.static_input_indices
|
||||
|
||||
|
||||
def record_original_output_strides(gm: GraphModule) -> None:
|
||||
@ -1580,7 +1580,6 @@ def fw_compiler_freezing(
|
||||
)
|
||||
|
||||
aot_example_inputs = [aot_example_inputs[ind] for ind in preserved_arg_indices]
|
||||
num_fixed = len(preserved_arg_indices) - num_example_inputs
|
||||
|
||||
fake_mode = detect_fake_mode(aot_example_inputs)
|
||||
|
||||
@ -1591,7 +1590,7 @@ def fw_compiler_freezing(
|
||||
idx for idx, n in enumerate(model_outputs) if isinstance(n, torch.fx.Node)
|
||||
]
|
||||
|
||||
static_input_idxs = list(range(num_fixed))
|
||||
static_input_idxs = []
|
||||
# constant params will be real tensors, not fake
|
||||
tracing_context = torch._guards.TracingContext.try_get()
|
||||
unwrapped_args_offsets = [0]
|
||||
@ -1623,7 +1622,7 @@ def fw_compiler_freezing(
|
||||
tracing_context.params_flat[i] = None
|
||||
|
||||
if tracing_context.fw_metadata:
|
||||
static_input_idxs += tracing_context.fw_metadata.static_input_indices
|
||||
static_input_idxs = tracing_context.fw_metadata.static_input_indices
|
||||
|
||||
with mock.patch.object(fake_mode, "allow_non_fake_inputs", True):
|
||||
optimized_function = inner_compile(
|
||||
|
||||
@ -52,14 +52,21 @@ def replace_params_with_constants(
|
||||
in (MutationType.MUTATED_IN_GRAPH, MutationType.MUTATED_OUT_GRAPH)
|
||||
]
|
||||
|
||||
static_indices_new = []
|
||||
static_indices_offset = 0
|
||||
for i, (real_input, node) in enumerate(zip(flat_params, fake_inp_nodes)):
|
||||
if i in mutated_inps or i in aliased_input_args:
|
||||
preserved_arg_indices.append(i)
|
||||
continue
|
||||
replace_node_with_constant(gm, node, real_input)
|
||||
if i in fw_metadata.static_input_indices:
|
||||
new_static_index = i - static_indices_offset
|
||||
static_indices_new.append(new_static_index)
|
||||
else:
|
||||
replace_node_with_constant(gm, node, real_input)
|
||||
static_indices_offset += 1
|
||||
# add on non param inputs
|
||||
preserved_arg_indices.extend(range(len(flat_params), len(params)))
|
||||
# is this necessary ?
|
||||
fw_metadata.static_input_indices = static_indices_new
|
||||
gm.recompile()
|
||||
return preserved_arg_indices
|
||||
|
||||
|
||||
@ -3488,6 +3488,85 @@ class GetAttrGuardAccessor : public GuardAccessor {
|
||||
PyObject* _attr_name;
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents object.__getattribute__(obj, attr_name) acccessor.
|
||||
*/
|
||||
class GenericGetAttrGuardAccessor : public GuardAccessor {
|
||||
public:
|
||||
GenericGetAttrGuardAccessor(
|
||||
RootGuardManager* root,
|
||||
py::str name,
|
||||
std::string source,
|
||||
py::handle example_value,
|
||||
py::handle guard_manager_enum)
|
||||
: GuardAccessor(
|
||||
root,
|
||||
name,
|
||||
std::move(source),
|
||||
example_value,
|
||||
guard_manager_enum),
|
||||
_attr_name(name.ptr()) {}
|
||||
|
||||
// NB: Intentional duplication between check_nopybind and
|
||||
// check_verbose_nopybind.
|
||||
bool check_nopybind(PyObject* obj, bool matches_dict_tag = false)
|
||||
override { // borrowed ref
|
||||
PyObject* x = PyObject_GenericGetAttr(obj, _attr_name); // new ref
|
||||
if (x == nullptr) {
|
||||
// Attribute absent, clear the exception and return false.
|
||||
PyErr_Clear();
|
||||
return false;
|
||||
}
|
||||
bool result = _guard_manager->check_nopybind(x);
|
||||
Py_DECREF(x);
|
||||
return result;
|
||||
}
|
||||
|
||||
GuardDebugInfo check_verbose_nopybind(
|
||||
PyObject* obj) override { // borrowed ref
|
||||
PyObject* x = PyObject_GenericGetAttr(obj, _attr_name); // new ref
|
||||
if (x == nullptr) {
|
||||
// Attribute absent, clear the exception and return false.
|
||||
PyErr_Clear();
|
||||
return GuardDebugInfo(
|
||||
false, "getattr failed on source " + get_source(), 0);
|
||||
}
|
||||
GuardDebugInfo result = _guard_manager->check_verbose_nopybind(x);
|
||||
Py_DECREF(x);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string repr() const override {
|
||||
// Helpful when priting GuardManager tree structure.
|
||||
return "GenericGetAttrGuardAccessor(" +
|
||||
py::str(_attr_name).cast<std::string>() + ")";
|
||||
}
|
||||
|
||||
public: // cloning functions
|
||||
GenericGetAttrGuardAccessor(
|
||||
GuardManager* guard_manager,
|
||||
GenericGetAttrGuardAccessor* from)
|
||||
: GuardAccessor(guard_manager, from) {
|
||||
from->clone_visitor(this);
|
||||
}
|
||||
|
||||
GuardAccessor* clone(
|
||||
RootGuardManager* cloned_root,
|
||||
const py::function& clone_filter_fn) override {
|
||||
return clone_common<GenericGetAttrGuardAccessor>(
|
||||
cloned_root, clone_filter_fn);
|
||||
}
|
||||
|
||||
void clone_visitor(GenericGetAttrGuardAccessor* to) {
|
||||
to->_attr_name = _attr_name;
|
||||
}
|
||||
|
||||
private:
|
||||
// no need of py::object here because the attr_name is already passed on to
|
||||
// the base class as accessor_key which is a py::object.
|
||||
PyObject* _attr_name{nullptr};
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents x.__dict__ acccessor.
|
||||
*/
|
||||
@ -5349,6 +5428,12 @@ PyObject* torch_c_dynamo_guards_init() {
|
||||
GuardAccessor,
|
||||
std::unique_ptr<GetAttrGuardAccessor>>(py_m, "GetAttrGuardAccessor");
|
||||
// NOLINTNEXTLINE(bugprone-unused-raii)
|
||||
py::class_<
|
||||
GenericGetAttrGuardAccessor,
|
||||
GuardAccessor,
|
||||
std::unique_ptr<GenericGetAttrGuardAccessor>>(
|
||||
py_m, "GenericGetAttrGuardAccessor");
|
||||
// NOLINTNEXTLINE(bugprone-unused-raii)
|
||||
py::class_<
|
||||
GetGenericDictGuardAccessor,
|
||||
GuardAccessor,
|
||||
@ -5917,6 +6002,16 @@ PyObject* torch_c_dynamo_guards_init() {
|
||||
py::return_value_policy::reference)
|
||||
// return by reference because C++ GuardManager has the ownership of
|
||||
// accessors and guard managers
|
||||
.def(
|
||||
"generic_getattr_manager",
|
||||
&GuardManager::get_child_manager<GenericGetAttrGuardAccessor>,
|
||||
py::arg("attr"),
|
||||
py::arg("source"),
|
||||
py::arg("example_value"),
|
||||
py::arg("guard_manager_enum"),
|
||||
py::return_value_policy::reference)
|
||||
// return by reference because C++ GuardManager has the ownership of
|
||||
// accessors and guard managers
|
||||
.def(
|
||||
"getattr_manager",
|
||||
&GuardManager::get_child_manager<GetAttrGuardAccessor>,
|
||||
|
||||
@ -4596,7 +4596,7 @@ def barrier(
|
||||
group (ProcessGroup, optional): The process group to work on. If None,
|
||||
the default process group will be used.
|
||||
async_op (bool, optional): Whether this op should be an async op
|
||||
device_ids ([int], optional): List of device/GPU ids. Only one id is expected.
|
||||
device_ids ([int], optional): List of device/GPU ids.
|
||||
|
||||
Returns:
|
||||
Async work handle, if async_op is set to True.
|
||||
@ -4604,34 +4604,21 @@ def barrier(
|
||||
|
||||
.. note:: `ProcessGroupNCCL` now blocks the cpu thread till the completion of the barrier collective.
|
||||
"""
|
||||
group = group or _get_default_group()
|
||||
|
||||
if _rank_not_in_group(group):
|
||||
_warn_not_in_group("barrier")
|
||||
return
|
||||
|
||||
opts = BarrierOptions()
|
||||
# Detect the accelerator on the machine. If no accelerator is available, it
|
||||
# returns CPU.
|
||||
device = torch._C._get_accelerator()
|
||||
if isinstance(device_ids, list):
|
||||
opts.device_ids = device_ids
|
||||
# use only the first device id
|
||||
opts.device = torch.device(device.type, device_ids[0])
|
||||
elif getattr(group, "bound_device_id", None) is not None:
|
||||
# Use device id from `init_process_group(device_id=...)`
|
||||
opts.device = group.bound_device_id # type: ignore[assignment]
|
||||
elif device.type == "cpu" or _get_object_coll_device(group) == "cpu":
|
||||
opts.device = torch.device("cpu")
|
||||
else:
|
||||
# Use the current device set by the user. If user did not set any, this
|
||||
# may use default device 0, causing issues like hang or all processes
|
||||
# creating context on device 0.
|
||||
opts.device = device
|
||||
warnings.warn( # warn only once
|
||||
"No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user. "
|
||||
)
|
||||
opts.device = torch.device(_get_object_coll_device(group))
|
||||
if device_ids is not None:
|
||||
if isinstance(device_ids, list):
|
||||
opts.device_ids = device_ids
|
||||
else:
|
||||
raise TypeError(
|
||||
"Invalid function argument: device_ids type should be List[int]"
|
||||
)
|
||||
|
||||
group = group or _get_default_group()
|
||||
work = group.barrier(opts=opts)
|
||||
|
||||
if async_op:
|
||||
|
||||
Reference in New Issue
Block a user