mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-20 02:24:54 +08:00
Compare commits
36 Commits
whc/shardi
...
update-vll
| Author | SHA1 | Date | |
|---|---|---|---|
| d79ccd0bba | |||
| d6bb3ad8b9 | |||
| 13ec55d15b | |||
| cea86781f2 | |||
| cdca10b275 | |||
| b8a3165d28 | |||
| 8f161997b1 | |||
| c8d790b56d | |||
| 878757cb66 | |||
| a369a56726 | |||
| a4e0720fe2 | |||
| 1efc14a50d | |||
| dc4f3c7505 | |||
| e8970ba010 | |||
| 41999a579d | |||
| ebb2001a48 | |||
| ae85307512 | |||
| 7921c0eb0e | |||
| dda2cb3769 | |||
| 4c5042b368 | |||
| e3c5b78999 | |||
| 14f370f551 | |||
| aa22d41f9b | |||
| d1f6dd6105 | |||
| 5333e51195 | |||
| 0e13964b74 | |||
| 20cae808f7 | |||
| 57927a620d | |||
| d91269e8ce | |||
| e2b53baaa4 | |||
| f077ecab92 | |||
| 57f36c9dc6 | |||
| ee5610fa91 | |||
| d0e7d2e093 | |||
| 5605fce2c8 | |||
| 2f023bf7b9 |
@ -125,10 +125,10 @@ case "$tag" in
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks)
|
||||
CUDA_VERSION=12.8.1
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
GCC_VERSION=11
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -146,16 +146,6 @@ case "$tag" in
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.8.1
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3-clang12-onnx)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
CLANG_VERSION=12
|
||||
@ -188,7 +178,7 @@ case "$tag" in
|
||||
fi
|
||||
GCC_VERSION=11
|
||||
VISION=yes
|
||||
ROCM_VERSION=7.0
|
||||
ROCM_VERSION=7.1
|
||||
NINJA_VERSION=1.9.0
|
||||
TRITON=yes
|
||||
KATEX=yes
|
||||
|
||||
@ -60,14 +60,16 @@ EOF
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated rocm-llvm-dev
|
||||
fi
|
||||
|
||||
# precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5
|
||||
# search for all unversioned packages
|
||||
# if search fails it will abort this script; use true to avoid case where search fails
|
||||
MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
|
||||
if [[ "x${MIOPENHIPGFX}" = x ]]; then
|
||||
echo "miopen-hip-gfx package not available" && exit 1
|
||||
else
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
|
||||
if [[ $(ver $ROCM_VERSION) -lt $(ver 7.1) ]]; then
|
||||
# precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5, removed in ROCm 7.1
|
||||
# search for all unversioned packages
|
||||
# if search fails it will abort this script; use true to avoid case where search fails
|
||||
MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
|
||||
if [[ "x${MIOPENHIPGFX}" = x ]]; then
|
||||
echo "miopen-hip-gfx package not available" && exit 1
|
||||
else
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
|
||||
fi
|
||||
fi
|
||||
|
||||
# ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
|
||||
|
||||
@ -12,8 +12,8 @@ function do_install() {
|
||||
|
||||
rocm_version_nodot=${rocm_version//./}
|
||||
|
||||
# post merge of https://github.com/icl-utk-edu/magma/pull/65
|
||||
MAGMA_VERSION=c0792ae825fb36872784892ea643dd6f3456bc5f
|
||||
# https://github.com/icl-utk-edu/magma/pull/65
|
||||
MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
|
||||
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
||||
|
||||
rocm_dir="/opt/rocm"
|
||||
|
||||
@ -402,3 +402,6 @@ scikit-build==0.18.1
|
||||
pyre-extensions==0.0.32
|
||||
tabulate==0.9.0
|
||||
#Description: These package are needed to build FBGEMM and torchrec on PyTorch CI
|
||||
|
||||
Jinja2==3.1.6
|
||||
#Description: required for torch.distributed.debug
|
||||
|
||||
@ -84,7 +84,6 @@ class VllmTestRunner(BaseRunner):
|
||||
self.VLLM_TEST_WHLS_REGEX = [
|
||||
"xformers/*.whl",
|
||||
"vllm/vllm*.whl",
|
||||
"flashinfer-python/flashinfer*.whl",
|
||||
]
|
||||
|
||||
def prepare(self):
|
||||
|
||||
35
.github/ci_configs/vllm/Dockerfile
vendored
35
.github/ci_configs/vllm/Dockerfile
vendored
@ -1,4 +1,4 @@
|
||||
ARG CUDA_VERSION=12.8.1
|
||||
ARG CUDA_VERSION=12.9.1
|
||||
ARG PYTHON_VERSION=3.12
|
||||
|
||||
# BUILD_BASE_IMAGE: used to setup python build xformers, and vllm wheels, It can be replaced with a different base image from local machine,
|
||||
@ -124,7 +124,7 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
|
||||
git clone https://github.com/facebookresearch/xformers.git
|
||||
|
||||
pushd xformers
|
||||
git checkout v0.0.32.post2
|
||||
git checkout v0.0.33.post1
|
||||
git submodule update --init --recursive
|
||||
python3 setup.py bdist_wheel --dist-dir=../xformers-dist --verbose
|
||||
popd
|
||||
@ -256,7 +256,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
||||
ENV UV_LINK_MODE=copy
|
||||
|
||||
# Install build and runtime dependencies, this is needed for flashinfer install
|
||||
# Install build and runtime dependencies
|
||||
COPY requirements/build.txt requirements/build.txt
|
||||
COPY use_existing_torch.py use_existing_torch.py
|
||||
RUN python3 use_existing_torch.py
|
||||
@ -294,33 +294,9 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system /wheels/xformers/*.whl --verbose
|
||||
|
||||
# Build FlashInfer from source
|
||||
ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0'
|
||||
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
||||
|
||||
# TODO(elainewy): remove this once vllm commit is updated, and install flashinfer from pip
|
||||
# see https://github.com/pytorch/pytorch/pull/165274#issuecomment-3408531784
|
||||
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
||||
ARG FLASHINFER_GIT_REF="v0.2.14.post1"
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
git clone --depth 1 --recursive --shallow-submodules \
|
||||
--branch ${FLASHINFER_GIT_REF} \
|
||||
${FLASHINFER_GIT_REPO} flashinfer \
|
||||
&& echo "Building FlashInfer with AOT for arches: ${torch_cuda_arch_list}" \
|
||||
&& cd flashinfer \
|
||||
&& python3 -m flashinfer.aot \
|
||||
&& python3 -m build --no-isolation --wheel --outdir ../wheels/flashinfer \
|
||||
&& cd .. \
|
||||
&& rm -rf flashinfer
|
||||
|
||||
# Install FlashInfer
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system wheels/flashinfer/*.whl --verbose
|
||||
|
||||
# Logging to confirm the torch versions
|
||||
RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
|
||||
RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm\|^flashinfer' > build_summary.txt
|
||||
RUN pip freeze | grep -E 'torch|xformers|vllm'
|
||||
RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm' > build_summary.txt
|
||||
################### VLLM INSTALLED IMAGE ####################
|
||||
|
||||
|
||||
@ -331,4 +307,3 @@ FROM scratch as export-wheels
|
||||
COPY --from=base /workspace/xformers-dist /wheels/xformers
|
||||
COPY --from=build /workspace/vllm-dist /wheels/vllm
|
||||
COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt
|
||||
COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python
|
||||
|
||||
7
.github/labeler.yml
vendored
7
.github/labeler.yml
vendored
@ -91,13 +91,6 @@
|
||||
"ciflow/trunk":
|
||||
- .ci/docker/ci_commit_pins/triton.txt
|
||||
|
||||
"oncall: distributed":
|
||||
- torch/csrc/distributed/**
|
||||
- torch/distributed/**
|
||||
- torch/nn/parallel/**
|
||||
- test/distributed/**
|
||||
- torch/testing/_internal/distributed/**
|
||||
|
||||
"release notes: distributed (checkpoint)":
|
||||
- torch/distributed/checkpoint/**
|
||||
- test/distributed/checkpoint/**
|
||||
|
||||
@ -50,6 +50,7 @@ CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64", "12.9-aarch64", "13.0-aar
|
||||
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"12.6": (
|
||||
"cuda-bindings==12.9.4; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | "
|
||||
@ -67,6 +68,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'"
|
||||
),
|
||||
"12.8": (
|
||||
"cuda-bindings==12.9.4; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | "
|
||||
@ -84,6 +86,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'"
|
||||
),
|
||||
"12.9": (
|
||||
"cuda-bindings==12.9.4; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | "
|
||||
@ -101,6 +104,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'"
|
||||
),
|
||||
"13.0": (
|
||||
"cuda-bindings==13.0.3; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | "
|
||||
"nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | "
|
||||
|
||||
2
.github/scripts/prepare_vllm_wheels.sh
vendored
2
.github/scripts/prepare_vllm_wheels.sh
vendored
@ -88,7 +88,7 @@ repackage_wheel() {
|
||||
${PYTHON_EXECUTABLE} -mpip install wheel==0.45.1
|
||||
|
||||
pushd externals/vllm/wheels
|
||||
for package in xformers flashinfer-python vllm; do
|
||||
for package in xformers vllm; do
|
||||
repackage_wheel $package
|
||||
done
|
||||
popd
|
||||
|
||||
@ -23,7 +23,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '8.0 9.0'
|
||||
test-matrix: |
|
||||
@ -39,7 +39,7 @@ jobs:
|
||||
needs: attn-microbenchmark-build
|
||||
with:
|
||||
timeout-minutes: 500
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image: ${{ needs.attn-microbenchmark-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.attn-microbenchmark-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
@ -51,7 +51,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '10.0'
|
||||
test-matrix: |
|
||||
@ -66,7 +66,7 @@ jobs:
|
||||
needs: opmicrobenchmark-build-b200
|
||||
with:
|
||||
timeout-minutes: 500
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
||||
docker-image: ${{ needs.opmicrobenchmark-build-b200.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.opmicrobenchmark-build-b200.outputs.test-matrix }}
|
||||
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
|
||||
6
.github/workflows/docker-builds.yml
vendored
6
.github/workflows/docker-builds.yml
vendored
@ -52,8 +52,7 @@ jobs:
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11,
|
||||
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11,
|
||||
pytorch-linux-jammy-py3.10-clang12,
|
||||
pytorch-linux-jammy-py3.11-clang12,
|
||||
@ -75,7 +74,8 @@ jobs:
|
||||
pytorch-linux-jammy-py3-clang12-onnx,
|
||||
pytorch-linux-jammy-linter,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter,
|
||||
pytorch-linux-jammy-py3-clang12-executorch,
|
||||
# TODO: Re-enable me when docker pin update happens
|
||||
# pytorch-linux-jammy-py3-clang12-executorch,
|
||||
pytorch-linux-jammy-py3.12-triton-cpu,
|
||||
pytorch-linux-noble-riscv64-py3.12-gcc14
|
||||
]
|
||||
|
||||
10
.github/workflows/docker-cache-rocm.yml
vendored
10
.github/workflows/docker-cache-rocm.yml
vendored
@ -6,10 +6,9 @@ on:
|
||||
branches: [main, release]
|
||||
types:
|
||||
- completed
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name }}
|
||||
group: ${{ github.workflow }}-${{ github.event.workflow_run.head_branch }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
@ -50,9 +49,10 @@ jobs:
|
||||
matrix:
|
||||
runner: [linux.rocm.gfx942.docker-cache]
|
||||
docker-image: [
|
||||
"${{ needs.download-docker-builds-artifacts.outputs.pytorch-linux-jammy-rocm-n-py3 }}",
|
||||
"${{ needs.download-docker-builds-artifacts.outputs.pytorch-linux-noble-rocm-n-py3 }}",
|
||||
"${{ needs.download-docker-builds-artifacts.outputs.pytorch-linux-jammy-rocm-n-py3-benchmarks }}"
|
||||
"${{ needs.download-docker-builds-artifacts.outputs.pytorch-linux-jammy-rocm-n-py3 }}"
|
||||
#"${{ needs.download-docker-builds-artifacts.outputs.pytorch-linux-jammy-rocm-n-py3 }}",
|
||||
#"${{ needs.download-docker-builds-artifacts.outputs.pytorch-linux-noble-rocm-n-py3 }}",
|
||||
#"${{ needs.download-docker-builds-artifacts.outputs.pytorch-linux-jammy-rocm-n-py3-benchmarks }}"
|
||||
]
|
||||
runs-on: "${{ matrix.runner }}"
|
||||
steps:
|
||||
|
||||
56
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
56
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -132,7 +132,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -178,7 +178,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -224,7 +224,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -270,7 +270,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -381,7 +381,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -427,7 +427,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -473,7 +473,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -519,7 +519,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -630,7 +630,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -676,7 +676,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -722,7 +722,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -768,7 +768,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -879,7 +879,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -925,7 +925,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -971,7 +971,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1017,7 +1017,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1128,7 +1128,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1174,7 +1174,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1220,7 +1220,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1266,7 +1266,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1377,7 +1377,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1423,7 +1423,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1469,7 +1469,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1515,7 +1515,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1626,7 +1626,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-12_6
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1672,7 +1672,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-12_8
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1718,7 +1718,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -1764,7 +1764,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_14t-cuda-aarch64-13_0
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
56
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
56
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
@ -127,7 +127,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_6-test: # Testing
|
||||
@ -193,7 +193,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_8-test: # Testing
|
||||
@ -259,7 +259,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_9-test: # Testing
|
||||
@ -325,7 +325,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda13_0-test: # Testing
|
||||
@ -793,7 +793,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_6-test: # Testing
|
||||
@ -859,7 +859,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_8-test: # Testing
|
||||
@ -925,7 +925,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_9-test: # Testing
|
||||
@ -991,7 +991,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda13_0-test: # Testing
|
||||
@ -1459,7 +1459,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_6-test: # Testing
|
||||
@ -1525,7 +1525,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_8-test: # Testing
|
||||
@ -1591,7 +1591,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_9-test: # Testing
|
||||
@ -1657,7 +1657,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda13_0-test: # Testing
|
||||
@ -2125,7 +2125,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_6-test: # Testing
|
||||
@ -2191,7 +2191,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_8-test: # Testing
|
||||
@ -2257,7 +2257,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_9-test: # Testing
|
||||
@ -2323,7 +2323,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda13_0-test: # Testing
|
||||
@ -2791,7 +2791,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_6-test: # Testing
|
||||
@ -2857,7 +2857,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_8-test: # Testing
|
||||
@ -2923,7 +2923,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_9-test: # Testing
|
||||
@ -2989,7 +2989,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda13_0-test: # Testing
|
||||
@ -3457,7 +3457,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda12_6-test: # Testing
|
||||
@ -3523,7 +3523,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda12_8-test: # Testing
|
||||
@ -3589,7 +3589,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda12_9-test: # Testing
|
||||
@ -3655,7 +3655,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14-cuda13_0-test: # Testing
|
||||
@ -4123,7 +4123,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14t-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda12_6-test: # Testing
|
||||
@ -4189,7 +4189,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14t-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda12_8-test: # Testing
|
||||
@ -4255,7 +4255,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14t-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==12.9.4; platform_system == 'Linux' | nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda12_9-test: # Testing
|
||||
@ -4321,7 +4321,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_14t-cuda13_0
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-bindings==13.0.3; platform_system == 'Linux' | nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.1.0.3; platform_system == 'Linux' | nvidia-cufft==12.0.0.61; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | nvidia-nvtx==13.0.85; platform_system == 'Linux' | nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | nvidia-cufile==1.15.1.6; platform_system == 'Linux'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_14t-cuda13_0-test: # Testing
|
||||
|
||||
10
.github/workflows/inductor-micro-benchmark.yml
vendored
10
.github/workflows/inductor-micro-benchmark.yml
vendored
@ -30,14 +30,14 @@ jobs:
|
||||
opt_out_experiments: lf
|
||||
|
||||
build:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs:
|
||||
- get-default-label-prefix
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -46,11 +46,11 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
timeout-minutes: 720
|
||||
|
||||
10
.github/workflows/inductor-perf-compare.yml
vendored
10
.github/workflows/inductor-perf-compare.yml
vendored
@ -27,14 +27,14 @@ jobs:
|
||||
opt_out_experiments: lf
|
||||
|
||||
build:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs:
|
||||
- get-default-label-prefix
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -47,11 +47,11 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
# disable monitor in perf tests for more investigation
|
||||
|
||||
18
.github/workflows/inductor-perf-test-b200.yml
vendored
18
.github/workflows/inductor-perf-test-b200.yml
vendored
@ -80,7 +80,7 @@ jobs:
|
||||
opt_out_experiments: lf
|
||||
|
||||
build:
|
||||
name: cuda12.8-py3.10-gcc9-sm100
|
||||
name: cuda12.8-py3.10-gcc11-sm100
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
@ -90,8 +90,8 @@ jobs:
|
||||
# from trunk. Also use a memory-intensive runner here because memory is
|
||||
# usually the bottleneck
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '10.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -104,12 +104,12 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test-periodically:
|
||||
name: cuda12.8-py3.10-gcc9-sm100
|
||||
name: cuda12.8-py3.10-gcc11-sm100
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
if: github.event.schedule == '0 7 * * 1-6'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
@ -121,12 +121,12 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test-weekly:
|
||||
name: cuda12.8-py3.10-gcc9-sm100
|
||||
name: cuda12.8-py3.10-gcc11-sm100
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
if: github.event.schedule == '0 7 * * 0'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
@ -138,11 +138,11 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
name: cuda12.8-py3.10-gcc9-sm100
|
||||
name: cuda12.8-py3.10-gcc11-sm100
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
||||
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
|
||||
@ -95,8 +95,8 @@ jobs:
|
||||
# from trunk. Also use a memory-intensive runner here because memory is
|
||||
# usually the bottleneck
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm90
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '9.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -132,7 +132,7 @@ jobs:
|
||||
needs: build
|
||||
if: github.event.schedule == '15 0 * * 1-6'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm90
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
@ -149,7 +149,7 @@ jobs:
|
||||
needs: build
|
||||
if: github.event.schedule == '0 7 * * 0'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm90
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
@ -168,7 +168,7 @@ jobs:
|
||||
# needs one round of benchmark
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }}
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm90
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90
|
||||
dashboard-tag: training-${{ inputs.training || 'true' }}-inference-${{ inputs.inference || 'true' }}-default-${{ inputs.default || 'true' }}-dynamic-${{ inputs.dynamic || 'true' }}-cudagraphs-${{ inputs.cudagraphs || 'true' }}-cppwrapper-${{ inputs.cppwrapper || 'false' }}-aotinductor-${{ inputs.aotinductor || 'false' }}-maxautotune-${{ inputs.maxautotune || 'false' }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs || 'false' }}-cudagraphs_low_precision-${{ inputs.cudagraphs || 'false' }}
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
|
||||
18
.github/workflows/inductor-perf-test-nightly.yml
vendored
18
.github/workflows/inductor-perf-test-nightly.yml
vendored
@ -80,15 +80,15 @@ jobs:
|
||||
opt_out_experiments: lf
|
||||
|
||||
build:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
# Every bit to make perf run faster helps
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -117,12 +117,12 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test-nightly:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
if: github.event.schedule == '0 7 * * 1-6'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
@ -133,12 +133,12 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test-weekly:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
if: github.event.schedule == '0 7 * * 0'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
@ -150,12 +150,12 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
|
||||
12
.github/workflows/inductor-periodic.yml
vendored
12
.github/workflows/inductor-periodic.yml
vendored
@ -37,8 +37,8 @@ jobs:
|
||||
needs: get-default-label-prefix
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm86
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.0;8.6'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -76,7 +76,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: periodic-dynamo-benchmarks-build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm86
|
||||
docker-image: ${{ needs.periodic-dynamo-benchmarks-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.periodic-dynamo-benchmarks-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
@ -138,8 +138,8 @@ jobs:
|
||||
- get-default-label-prefix
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -153,7 +153,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: inductor-smoke-build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image: ${{ needs.inductor-smoke-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-smoke-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
6
.github/workflows/inductor-unittest.yml
vendored
6
.github/workflows/inductor-unittest.yml
vendored
@ -33,8 +33,8 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm86
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.6'
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
test-matrix: |
|
||||
@ -52,7 +52,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: inductor-build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm86
|
||||
docker-image: ${{ needs.inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
6
.github/workflows/inductor.yml
vendored
6
.github/workflows/inductor.yml
vendored
@ -49,8 +49,8 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm86
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.6'
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
test-matrix: |
|
||||
@ -69,7 +69,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: inductor-build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm86
|
||||
docker-image: ${{ needs.inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.inductor-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
@ -25,7 +25,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '8.0 9.0'
|
||||
test-matrix: |
|
||||
@ -41,7 +41,7 @@ jobs:
|
||||
needs: opmicrobenchmark-build
|
||||
with:
|
||||
timeout-minutes: 500
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image: ${{ needs.opmicrobenchmark-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.opmicrobenchmark-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
@ -53,7 +53,7 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '10.0'
|
||||
test-matrix: |
|
||||
@ -68,7 +68,7 @@ jobs:
|
||||
needs: opmicrobenchmark-build-b200
|
||||
with:
|
||||
timeout-minutes: 500
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
||||
docker-image: ${{ needs.opmicrobenchmark-build-b200.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.opmicrobenchmark-build-b200.outputs.test-matrix }}
|
||||
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
|
||||
51
.github/workflows/periodic.yml
vendored
51
.github/workflows/periodic.yml
vendored
@ -90,6 +90,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: 8.6
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
@ -97,7 +98,9 @@ jobs:
|
||||
{ config: "nogpu_AVX512", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
||||
{ config: "multigpu", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
|
||||
{ config: "multigpu", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
@ -113,40 +116,14 @@ jobs:
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc9-build:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc9
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-debug-build:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-debug
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9
|
||||
cuda-arch-list: 8.6
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "multigpu", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
|
||||
{ config: "multigpu", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc9-test:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc9
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-jammy-cuda12_8-py3_10-gcc9-build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc9-debug-build:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc9-debug
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-debug
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-debug
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: 8.9
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -160,16 +137,16 @@ jobs:
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc9-debug-test:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc9-debug
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-debug-test:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-debug
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-jammy-cuda12_8-py3_10-gcc9-debug-build
|
||||
- linux-jammy-cuda12_8-py3_10-gcc11-debug-build
|
||||
- target-determination
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-debug
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-debug-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-debug-build.outputs.test-matrix }}
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-debug
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-debug-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-debug-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda13_0-py3_10-gcc11-build:
|
||||
|
||||
20
.github/workflows/pull.yml
vendored
20
.github/workflows/pull.yml
vendored
@ -318,14 +318,14 @@ jobs:
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
|
||||
name: cuda12.8-py3.10-gcc9-sm75
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-inductor-build:
|
||||
name: cuda12.8-py3.10-gcc11-sm75
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm75
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm75
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '7.5'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -333,14 +333,14 @@ jobs:
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc9-inductor-test:
|
||||
name: cuda12.8-py3.10-gcc9-sm75
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-inductor-test:
|
||||
name: cuda12.8-py3.10-gcc11-sm75
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-jammy-cuda12_8-py3_10-gcc9-inductor-build
|
||||
needs: linux-jammy-cuda12_8-py3_10-gcc11-inductor-build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm75
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.test-matrix }}
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm75
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-inductor-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-noble-xpu-n-py3_10-build:
|
||||
|
||||
10
.github/workflows/torchbench.yml
vendored
10
.github/workflows/torchbench.yml
vendored
@ -26,14 +26,14 @@ jobs:
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
build:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs:
|
||||
- get-default-label-prefix
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
@ -42,11 +42,11 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
name: cuda12.8-py3.10-gcc9-sm80
|
||||
name: cuda12.8-py3.10-gcc11-sm80
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm80
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
5
.github/workflows/trunk.yml
vendored
5
.github/workflows/trunk.yml
vendored
@ -231,8 +231,8 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.12-gcc9-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
build-environment: linux-jammy-cuda12.8-py3.12-gcc11-sm80
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11-inductor-benchmarks
|
||||
cuda-arch-list: '8.0'
|
||||
secrets: inherit
|
||||
|
||||
@ -283,6 +283,7 @@ jobs:
|
||||
name: linux-jammy-py3-clang12-executorch
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
if: false # Has been broken for a while
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3-clang12-executorch
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
- Do NOT run pre-commit, it is not setup
|
||||
- To run lint, run 'lintrunner -a' (which will autoapply changes)
|
||||
- Do NOT attempt to install dependencies, you do not have Internet access
|
||||
- Do NOT create summary files unless explicitly asked
|
||||
- When you are ready to make a PR, do exactly these steps:
|
||||
- git stash -u
|
||||
- git reset --hard $(cat /tmp/orig_work.txt) # NB: reset to the LOCAL branch, do NOT fetch
|
||||
|
||||
@ -144,7 +144,7 @@ inline std::bitset<kVmapNumLevels> createVmapLevelsBitset(BatchDimsRef bdims) {
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const BatchDim& bdim) {
|
||||
out << "(lvl=" << bdim.level() << ", dim=" << bdim.dim() << ")";
|
||||
out << "(lvl=" << bdim.level() << ", dim=" << bdim.dim() << ')';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@ -9,7 +9,7 @@ namespace indexing {
|
||||
const EllipsisIndexType Ellipsis = EllipsisIndexType();
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const Slice& slice) {
|
||||
stream << slice.start() << ":" << slice.stop() << ":" << slice.step();
|
||||
stream << slice.start() << ':' << slice.stop() << ':' << slice.step();
|
||||
return stream;
|
||||
}
|
||||
|
||||
@ -31,12 +31,12 @@ std::ostream& operator<<(std::ostream& stream, const TensorIndex& tensor_index)
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const std::vector<TensorIndex>& tensor_indices) {
|
||||
stream << "(";
|
||||
stream << '(';
|
||||
for (const auto i : c10::irange(tensor_indices.size())) {
|
||||
stream << tensor_indices[i];
|
||||
if (i < tensor_indices.size() - 1) stream << ", ";
|
||||
}
|
||||
stream << ")";
|
||||
stream << ')';
|
||||
return stream;
|
||||
}
|
||||
|
||||
|
||||
@ -113,7 +113,7 @@ void TensorNames::checkUnique(const char* op_name) const {
|
||||
std::ostream& operator<<(std::ostream& out, const TensorName& tensorname) {
|
||||
out << tensorname.name_ << " (index ";
|
||||
out << tensorname.origin_idx_ << " of ";
|
||||
out << tensorname.origin_ << ")";
|
||||
out << tensorname.origin_ << ')';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@ -13,9 +13,9 @@ std::ostream& operator<<(std::ostream & out, const TensorGeometryArg& t) {
|
||||
if (t.pos == 0) {
|
||||
// 0 is distinguished; it usually indicates 'self' or the return
|
||||
// tensor
|
||||
out << "'" << t.name << "'";
|
||||
out << '\'' << t.name << '\'';
|
||||
} else {
|
||||
out << "argument #" << t.pos << " '" << t.name << "'";
|
||||
out << "argument #" << t.pos << " '" << t.name << '\'';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@ -154,7 +154,7 @@ void checkSameGPU(CheckedFrom c, const TensorArg& t1, const TensorArg& t2) {
|
||||
oss << "Tensor for " << t2 << " is on CPU, ";
|
||||
}
|
||||
oss << "but expected " << ((!t1->is_cpu() && !t2->is_cpu()) ? "them" : "it")
|
||||
<< " to be on GPU (while checking arguments for " << c << ")";
|
||||
<< " to be on GPU (while checking arguments for " << c << ')';
|
||||
TORCH_CHECK(false, oss.str());
|
||||
}
|
||||
TORCH_CHECK(
|
||||
@ -199,7 +199,7 @@ void checkScalarTypes(CheckedFrom c, const TensorArg& t,
|
||||
i++;
|
||||
}
|
||||
oss << "; but got " << t->toString()
|
||||
<< " instead (while checking arguments for " << c << ")";
|
||||
<< " instead (while checking arguments for " << c << ')';
|
||||
TORCH_CHECK(false, oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
@ -43,8 +43,8 @@ std::string get_mkldnn_version() {
|
||||
// https://github.com/intel/ideep/issues/29
|
||||
{
|
||||
const dnnl_version_t* ver = dnnl_version();
|
||||
ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
|
||||
<< " (Git Hash " << ver->hash << ")";
|
||||
ss << "Intel(R) MKL-DNN v" << ver->major << '.' << ver->minor << '.' << ver->patch
|
||||
<< " (Git Hash " << ver->hash << ')';
|
||||
}
|
||||
#else
|
||||
ss << "MKLDNN not found";
|
||||
@ -81,7 +81,7 @@ std::string get_openmp_version() {
|
||||
break;
|
||||
}
|
||||
if (ver_str) {
|
||||
ss << " (a.k.a. OpenMP " << ver_str << ")";
|
||||
ss << " (a.k.a. OpenMP " << ver_str << ')';
|
||||
}
|
||||
}
|
||||
#else
|
||||
@ -135,38 +135,38 @@ std::string show_config() {
|
||||
|
||||
#if defined(__GNUC__)
|
||||
{
|
||||
ss << " - GCC " << __GNUC__ << "." << __GNUC_MINOR__ << "\n";
|
||||
ss << " - GCC " << __GNUC__ << '.' << __GNUC_MINOR__ << '\n';
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
{
|
||||
ss << " - C++ Version: " << __cplusplus << "\n";
|
||||
ss << " - C++ Version: " << __cplusplus << '\n';
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__clang_major__)
|
||||
{
|
||||
ss << " - clang " << __clang_major__ << "." << __clang_minor__ << "." << __clang_patchlevel__ << "\n";
|
||||
ss << " - clang " << __clang_major__ << '.' << __clang_minor__ << '.' << __clang_patchlevel__ << '\n';
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
{
|
||||
ss << " - MSVC " << _MSC_FULL_VER << "\n";
|
||||
ss << " - MSVC " << _MSC_FULL_VER << '\n';
|
||||
}
|
||||
#endif
|
||||
|
||||
#if AT_MKL_ENABLED()
|
||||
ss << " - " << get_mkl_version() << "\n";
|
||||
ss << " - " << get_mkl_version() << '\n';
|
||||
#endif
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
ss << " - " << get_mkldnn_version() << "\n";
|
||||
ss << " - " << get_mkldnn_version() << '\n';
|
||||
#endif
|
||||
|
||||
#ifdef _OPENMP
|
||||
ss << " - " << get_openmp_version() << "\n";
|
||||
ss << " - " << get_openmp_version() << '\n';
|
||||
#endif
|
||||
|
||||
#if AT_BUILD_WITH_LAPACK()
|
||||
@ -183,7 +183,7 @@ std::string show_config() {
|
||||
ss << " - Cross compiling on MacOSX\n";
|
||||
#endif
|
||||
|
||||
ss << " - "<< used_cpu_capability() << "\n";
|
||||
ss << " - "<< used_cpu_capability() << '\n';
|
||||
|
||||
if (hasCUDA()) {
|
||||
ss << detail::getCUDAHooks().showConfig();
|
||||
@ -200,10 +200,10 @@ std::string show_config() {
|
||||
ss << " - Build settings: ";
|
||||
for (const auto& pair : caffe2::GetBuildOptions()) {
|
||||
if (!pair.second.empty()) {
|
||||
ss << pair.first << "=" << pair.second << ", ";
|
||||
ss << pair.first << '=' << pair.second << ", ";
|
||||
}
|
||||
}
|
||||
ss << "\n";
|
||||
ss << '\n';
|
||||
|
||||
// TODO: do HIP
|
||||
// TODO: do XLA
|
||||
|
||||
@ -209,7 +209,7 @@ struct CodeTemplate {
|
||||
// to indent correctly in the context.
|
||||
void emitIndent(std::ostream& out, size_t indent) const {
|
||||
for ([[maybe_unused]] const auto i : c10::irange(indent)) {
|
||||
out << " ";
|
||||
out << ' ';
|
||||
}
|
||||
}
|
||||
void emitStringWithIndents(
|
||||
|
||||
@ -10,7 +10,7 @@ std::ostream& operator<<(std::ostream& out, const Dimname& dimname) {
|
||||
if (dimname.type() == NameType::WILDCARD) {
|
||||
out << "None";
|
||||
} else {
|
||||
out << "'" << dimname.symbol().toUnqualString() << "'";
|
||||
out << '\'' << dimname.symbol().toUnqualString() << '\'';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
namespace at {
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const Range& range) {
|
||||
out << "Range[" << range.begin << ", " << range.end << "]";
|
||||
out << "Range[" << range.begin << ", " << range.end << ']';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@ -71,7 +71,7 @@ void TensorBase::enforce_invariants() {
|
||||
|
||||
void TensorBase::print() const {
|
||||
if (defined()) {
|
||||
std::cerr << "[" << toString() << " " << sizes() << "]" << '\n';
|
||||
std::cerr << '[' << toString() << ' ' << sizes() << ']' << '\n';
|
||||
} else {
|
||||
std::cerr << "[UndefinedTensor]" << '\n';
|
||||
}
|
||||
|
||||
@ -9,8 +9,8 @@ APIVitals VitalsAPI;
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, TorchVital const& tv) {
|
||||
for (const auto& m : tv.attrs) {
|
||||
os << "[TORCH_VITAL] " << tv.name << "." << m.first << "\t\t "
|
||||
<< m.second.value << "\n";
|
||||
os << "[TORCH_VITAL] " << tv.name << '.' << m.first << "\t\t "
|
||||
<< m.second.value << '\n';
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
@ -100,18 +100,18 @@ inline bool operator==(const AliasInfo& lhs, const AliasInfo& rhs) {
|
||||
|
||||
// this does match the way things are represented in the schema
|
||||
inline std::ostream& operator<<(std::ostream& out, const AliasInfo& aliasInfo) {
|
||||
out << "(";
|
||||
out << '(';
|
||||
bool first = true;
|
||||
for (const auto& set : aliasInfo.beforeSets()) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
out << "|";
|
||||
out << '|';
|
||||
}
|
||||
out << set.toUnqualString();
|
||||
}
|
||||
if (aliasInfo.isWrite()) {
|
||||
out << "!";
|
||||
out << '!';
|
||||
}
|
||||
if (aliasInfo.beforeSets() != aliasInfo.afterSets()) {
|
||||
out << " -> ";
|
||||
@ -120,12 +120,12 @@ inline std::ostream& operator<<(std::ostream& out, const AliasInfo& aliasInfo) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
out << "|";
|
||||
out << '|';
|
||||
}
|
||||
out << set.toUnqualString();
|
||||
}
|
||||
}
|
||||
out << ")";
|
||||
out << ')';
|
||||
return out;
|
||||
}
|
||||
} // namespace c10
|
||||
|
||||
@ -198,7 +198,7 @@ inline void swap(Blob& lhs, Blob& rhs) noexcept {
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const Blob& v) {
|
||||
return out << "Blob[" << v.TypeName() << "]";
|
||||
return out << "Blob[" << v.TypeName() << ']';
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
@ -456,8 +456,8 @@ bool ClassType::isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const {
|
||||
*why_not << "Method on class '" << repr_str()
|
||||
<< "' (1) is not compatible with interface '"
|
||||
<< rhs.repr_str() << "' (2)\n"
|
||||
<< " (1) " << self_method->getSchema() << "\n"
|
||||
<< " (2) " << schema << "\n";
|
||||
<< " (1) " << self_method->getSchema() << '\n'
|
||||
<< " (2) " << schema << '\n';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -100,7 +100,7 @@ struct TORCH_API ClassType : public NamedType {
|
||||
std::string repr_str() const override {
|
||||
std::stringstream ss;
|
||||
ss << str()
|
||||
<< " (of Python compilation unit at: " << compilation_unit().get() << ")";
|
||||
<< " (of Python compilation unit at: " << compilation_unit().get() << ')';
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
@ -58,12 +58,12 @@ std::string DispatchKeyExtractor::dumpState() const {
|
||||
std::ostringstream oss;
|
||||
for (const auto i : c10::irange(c10::utils::bitset::NUM_BITS())) {
|
||||
if (dispatch_arg_indices_reverse_.get(i)) {
|
||||
oss << "1";
|
||||
oss << '1';
|
||||
} else {
|
||||
oss << "0";
|
||||
oss << '0';
|
||||
}
|
||||
}
|
||||
oss << " " << nonFallthroughKeys_ << "\n";
|
||||
oss << ' ' << nonFallthroughKeys_ << '\n';
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
||||
@ -69,8 +69,8 @@ private:
|
||||
|
||||
void _print_dispatch_trace(const std::string& label, const std::string& op_name, const DispatchKeySet& dispatchKeySet) {
|
||||
auto nesting_value = dispatch_trace_nesting_value();
|
||||
for (int64_t i = 0; i < nesting_value; ++i) std::cerr << " ";
|
||||
std::cerr << label << " op=[" << op_name << "], key=[" << toString(dispatchKeySet.highestPriorityTypeId()) << "]" << std::endl;
|
||||
for (int64_t i = 0; i < nesting_value; ++i) std::cerr << ' ';
|
||||
std::cerr << label << " op=[" << op_name << "], key=[" << toString(dispatchKeySet.highestPriorityTypeId()) << ']' << std::endl;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
|
||||
@ -570,7 +570,7 @@ void OperatorEntry::checkInvariants() const {
|
||||
|
||||
std::string OperatorEntry::listAllDispatchKeys() const {
|
||||
std::ostringstream str;
|
||||
str << "[";
|
||||
str << '[';
|
||||
|
||||
bool has_kernels = false;
|
||||
for (auto k : allDispatchKeysInFullSet()) {
|
||||
@ -584,7 +584,7 @@ std::string OperatorEntry::listAllDispatchKeys() const {
|
||||
str << k;
|
||||
has_kernels = true;
|
||||
}
|
||||
str << "]";
|
||||
str << ']';
|
||||
return str.str();
|
||||
}
|
||||
|
||||
@ -683,12 +683,12 @@ void OperatorEntry::setReportErrorCallback_(std::unique_ptr<c10::SafePyObject> c
|
||||
// This WON'T report backend fallbacks.
|
||||
std::string OperatorEntry::dumpState() const {
|
||||
std::ostringstream oss;
|
||||
oss << "name: " << name_ << "\n";
|
||||
oss << "name: " << name_ << '\n';
|
||||
if (schema_) {
|
||||
oss << "schema: " << schema_->schema << "\n";
|
||||
oss << "debug: " << schema_->debug << "\n";
|
||||
oss << "schema: " << schema_->schema << '\n';
|
||||
oss << "debug: " << schema_->debug << '\n';
|
||||
oss << "alias analysis kind: " << toString(schema_->schema.aliasAnalysis())
|
||||
<< (schema_->schema.isDefaultAliasAnalysisKind() ? " (default)" : "") << "\n";
|
||||
<< (schema_->schema.isDefaultAliasAnalysisKind() ? " (default)" : "") << '\n';
|
||||
} else {
|
||||
oss << "schema: (none)\n";
|
||||
}
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
namespace c10 {
|
||||
|
||||
void FunctionSchema::dump() const {
|
||||
std::cout << *this << "\n";
|
||||
std::cout << *this << '\n';
|
||||
}
|
||||
|
||||
const std::vector<Argument>& FunctionSchema::getCorrectList(SchemaArgType type) const {
|
||||
@ -210,9 +210,9 @@ std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema) {
|
||||
|
||||
out << schema.name();
|
||||
if (!schema.overload_name().empty()) {
|
||||
out << "." << schema.overload_name();
|
||||
out << '.' << schema.overload_name();
|
||||
}
|
||||
out << "(";
|
||||
out << '(';
|
||||
|
||||
bool seen_kwarg_only = false;
|
||||
for (const auto i : c10::irange(schema.arguments().size())) {
|
||||
@ -273,7 +273,7 @@ std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema) {
|
||||
}
|
||||
|
||||
if (need_paren) {
|
||||
out << "(";
|
||||
out << '(';
|
||||
}
|
||||
for (const auto i : c10::irange(returns.size())) {
|
||||
if (i > 0) {
|
||||
@ -288,7 +288,7 @@ std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema) {
|
||||
out << "...";
|
||||
}
|
||||
if (need_paren) {
|
||||
out << ")";
|
||||
out << ')';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@ -471,7 +471,7 @@ bool FunctionSchema::isForwardCompatibleWith(
|
||||
if (!arguments().at(i).isForwardCompatibleWith(old.arguments().at(i))) {
|
||||
if (why_not) {
|
||||
why_not
|
||||
<< "'" << arguments().at(i).name() << "'"
|
||||
<< '\'' << arguments().at(i).name() << '\''
|
||||
<< " is not forward compatible with the older version of the schema";
|
||||
}
|
||||
return false;
|
||||
@ -511,7 +511,7 @@ bool FunctionSchema::isForwardCompatibleWith(
|
||||
.isForwardCompatibleWith(old.arguments().at(i))) {
|
||||
if (why_not) {
|
||||
why_not << "Out argument '"
|
||||
<< "'" << arguments().at(i).name()
|
||||
<< '\'' << arguments().at(i).name()
|
||||
<< " is not FC with the older version of the schema";
|
||||
}
|
||||
return false;
|
||||
|
||||
@ -571,7 +571,7 @@ inline std::ostream& operator<<(std::ostream& out, const Argument& arg) {
|
||||
if (arg.N()) {
|
||||
N = std::to_string(*arg.N());
|
||||
}
|
||||
out << "[" << N << "]";
|
||||
out << '[' << N << ']';
|
||||
} else {
|
||||
out << unopt_type->str();
|
||||
}
|
||||
@ -582,15 +582,15 @@ inline std::ostream& operator<<(std::ostream& out, const Argument& arg) {
|
||||
}
|
||||
|
||||
if (is_opt) {
|
||||
out << "?";
|
||||
out << '?';
|
||||
}
|
||||
|
||||
if (!arg.name().empty()) {
|
||||
out << " " << arg.name();
|
||||
out << ' ' << arg.name();
|
||||
}
|
||||
|
||||
if (arg.default_value()) {
|
||||
out << "=";
|
||||
out << '=';
|
||||
if ((type->kind() == c10::TypeKind::StringType ||
|
||||
unopt_type->kind() == c10::TypeKind::StringType) &&
|
||||
arg.default_value().value().isString()) {
|
||||
|
||||
@ -66,7 +66,7 @@ bool operator==(const ivalue::Tuple& lhs, const ivalue::Tuple& rhs) {
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
|
||||
out << v.qualifiedClassName() << "." << v.name();
|
||||
out << v.qualifiedClassName() << '.' << v.name();
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -526,7 +526,7 @@ std::ostream& printMaybeAnnotatedList(
|
||||
!elementTypeCanBeInferredFromMembers(list_elem_type)) {
|
||||
out << "annotate(" << the_list.type<c10::Type>()->annotation_str() << ", ";
|
||||
printList(out, the_list.toListRef(), "[", "]", formatter);
|
||||
out << ")";
|
||||
out << ')';
|
||||
return out;
|
||||
} else {
|
||||
return printList(out, the_list.toListRef(), "[", "]", formatter);
|
||||
@ -538,7 +538,7 @@ std::ostream& printDict(
|
||||
std::ostream& out,
|
||||
const Dict& v,
|
||||
const IValueFormatter& formatter) {
|
||||
out << "{";
|
||||
out << '{';
|
||||
|
||||
bool first = true;
|
||||
for (const auto& pair : v) {
|
||||
@ -552,7 +552,7 @@ std::ostream& printDict(
|
||||
first = false;
|
||||
}
|
||||
|
||||
out << "}";
|
||||
out << '}';
|
||||
return out;
|
||||
}
|
||||
}
|
||||
@ -565,8 +565,8 @@ static std::ostream& printMaybeAnnotatedDict(
|
||||
auto value_type = the_dict.type()->castRaw<DictType>()->getValueType();
|
||||
if (the_dict.toGenericDict().empty() ||
|
||||
!elementTypeCanBeInferredFromMembers(value_type)) {
|
||||
out << "annotate(" << the_dict.type<c10::Type>()->annotation_str() << ",";
|
||||
printDict(out, the_dict.toGenericDict(), formatter) << ")";
|
||||
out << "annotate(" << the_dict.type<c10::Type>()->annotation_str() << ',';
|
||||
printDict(out, the_dict.toGenericDict(), formatter) << ')';
|
||||
} else {
|
||||
return printDict(out, the_dict.toGenericDict(), formatter);
|
||||
}
|
||||
@ -577,7 +577,7 @@ static std::ostream& printComplex(std::ostream & out, const IValue & v) {
|
||||
c10::complex<double> d = v.toComplexDouble();
|
||||
IValue real(d.real()), imag(std::abs(d.imag()));
|
||||
auto sign = d.imag() >= 0 ? '+' : '-';
|
||||
return out << real << sign << imag << "j";
|
||||
return out << real << sign << imag << 'j';
|
||||
}
|
||||
|
||||
std::ostream& IValue::repr(
|
||||
@ -605,9 +605,9 @@ std::ostream& IValue::repr(
|
||||
if (static_cast<double>(i) == d) {
|
||||
// -0.0 (signed zero) needs to be parsed as -0.
|
||||
if (i == 0 && std::signbit(d)) {
|
||||
return out << "-" << i << ".";
|
||||
return out << '-' << i << '.';
|
||||
}
|
||||
return out << i << ".";
|
||||
return out << i << '.';
|
||||
}
|
||||
}
|
||||
auto orig_prec = out.precision();
|
||||
@ -643,20 +643,20 @@ std::ostream& IValue::repr(
|
||||
device_stream << v.toDevice();
|
||||
out << "torch.device(";
|
||||
c10::printQuotedString(out, device_stream.str());
|
||||
return out << ")";
|
||||
return out << ')';
|
||||
}
|
||||
case IValue::Tag::Generator: {
|
||||
auto generator = v.toGenerator();
|
||||
out << "torch.Generator(device=";
|
||||
c10::printQuotedString(out, generator.device().str());
|
||||
out << ", seed=" << generator.current_seed() << ")";
|
||||
out << ", seed=" << generator.current_seed() << ')';
|
||||
return out;
|
||||
}
|
||||
case IValue::Tag::GenericDict:
|
||||
return printMaybeAnnotatedDict(out, v, formatter);
|
||||
case IValue::Tag::Enum: {
|
||||
auto enum_holder = v.toEnumHolder();
|
||||
return out << enum_holder->qualifiedClassName() << "." <<
|
||||
return out << enum_holder->qualifiedClassName() << '.' <<
|
||||
enum_holder->name();
|
||||
}
|
||||
case IValue::Tag::Object: {
|
||||
@ -801,7 +801,7 @@ std::ostream& operator<<(std::ostream & out, const IValue & v) {
|
||||
if (c == FP_NORMAL || c == FP_ZERO) {
|
||||
int64_t i = static_cast<int64_t>(d);
|
||||
if (static_cast<double>(i) == d) {
|
||||
return out << i << ".";
|
||||
return out << i << '.';
|
||||
}
|
||||
}
|
||||
auto orig_prec = out.precision();
|
||||
@ -852,7 +852,7 @@ std::ostream& operator<<(std::ostream & out, const IValue & v) {
|
||||
return printDict(out, v.toGenericDict(), formatter);
|
||||
case IValue::Tag::PyObject: {
|
||||
auto py_obj = v.toPyObject();
|
||||
return out << "<PyObject at" << py_obj << ">";
|
||||
return out << "<PyObject at" << py_obj << '>';
|
||||
}
|
||||
case IValue::Tag::Generator:
|
||||
return out << "Generator";
|
||||
@ -862,22 +862,22 @@ std::ostream& operator<<(std::ostream & out, const IValue & v) {
|
||||
// TODO we should attempt to call __str__ if the object defines it.
|
||||
auto obj = v.toObject();
|
||||
// print this out the way python would do it
|
||||
return out << "<" << obj->name() << " object at " << obj.get() << ">";
|
||||
return out << '<' << obj->name() << " object at " << obj.get() << '>';
|
||||
}
|
||||
case IValue::Tag::Enum: {
|
||||
auto enum_holder = v.toEnumHolder();
|
||||
return out << "Enum<" << enum_holder->unqualifiedClassName() << "." <<
|
||||
enum_holder->name() << ">";
|
||||
return out << "Enum<" << enum_holder->unqualifiedClassName() << '.' <<
|
||||
enum_holder->name() << '>';
|
||||
}
|
||||
|
||||
}
|
||||
return out << "<Invalid IValue tag=" << std::to_string(static_cast<uint32_t>(v.tag)) << ">";
|
||||
return out << "<Invalid IValue tag=" << std::to_string(static_cast<uint32_t>(v.tag)) << '>';
|
||||
}
|
||||
|
||||
#undef TORCH_FORALL_TAGS
|
||||
|
||||
void IValue::dump() const {
|
||||
std::cout << *this << "\n";
|
||||
std::cout << *this << '\n';
|
||||
}
|
||||
|
||||
std::shared_ptr<ClassType> ivalue::Object::type() const {
|
||||
@ -1050,7 +1050,7 @@ c10::intrusive_ptr<ivalue::Object> ivalue::Object::deepcopy(
|
||||
std::stringstream err;
|
||||
err << "Cannot serialize custom bound C++ class";
|
||||
if (auto qualname = type()->name()) {
|
||||
err << " " << qualname->qualifiedName();
|
||||
err << ' ' << qualname->qualifiedName();
|
||||
}
|
||||
err << ". Please define serialization methods via def_pickle() for "
|
||||
"this class.";
|
||||
|
||||
@ -211,7 +211,7 @@ struct TORCH_API OptionalType : public UnionType {
|
||||
|
||||
std::string str() const override {
|
||||
std::stringstream ss;
|
||||
ss << getElementType()->str() << "?";
|
||||
ss << getElementType()->str() << '?';
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
@ -240,7 +240,7 @@ struct TORCH_API OptionalType : public UnionType {
|
||||
|
||||
std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override {
|
||||
std::stringstream ss;
|
||||
ss << "Optional[" << getElementType()->annotation_str(printer) << "]";
|
||||
ss << "Optional[" << getElementType()->annotation_str(printer) << ']';
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
@ -906,7 +906,7 @@ struct TORCH_API ListType
|
||||
|
||||
std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override {
|
||||
std::stringstream ss;
|
||||
ss << "List[" << getElementType()->annotation_str(printer) << "]";
|
||||
ss << "List[" << getElementType()->annotation_str(printer) << ']';
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
@ -946,7 +946,7 @@ struct TORCH_API DictType : public SharedType {
|
||||
std::string str() const override {
|
||||
std::stringstream ss;
|
||||
ss << "Dict(" << getKeyType()->str() << ", " << getValueType()->str()
|
||||
<< ")";
|
||||
<< ')';
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
@ -1018,7 +1018,7 @@ struct TORCH_API FutureType
|
||||
|
||||
std::string str() const override {
|
||||
std::stringstream ss;
|
||||
ss << "Future(" << getElementType()->str() << ")";
|
||||
ss << "Future(" << getElementType()->str() << ')';
|
||||
return ss.str();
|
||||
}
|
||||
TypePtr createWithContained(
|
||||
@ -1041,7 +1041,7 @@ struct TORCH_API FutureType
|
||||
|
||||
std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override {
|
||||
std::stringstream ss;
|
||||
ss << "Future[" << getElementType()->annotation_str(printer) << "]";
|
||||
ss << "Future[" << getElementType()->annotation_str(printer) << ']';
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
@ -1060,7 +1060,7 @@ struct TORCH_API AwaitType
|
||||
|
||||
std::string str() const override {
|
||||
std::stringstream ss;
|
||||
ss << "Await(" << getElementType()->str() << ")";
|
||||
ss << "Await(" << getElementType()->str() << ')';
|
||||
return ss.str();
|
||||
}
|
||||
TypePtr createWithContained(
|
||||
@ -1083,7 +1083,7 @@ struct TORCH_API AwaitType
|
||||
|
||||
std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override {
|
||||
std::stringstream ss;
|
||||
ss << "Await[" << getElementType()->annotation_str(printer) << "]";
|
||||
ss << "Await[" << getElementType()->annotation_str(printer) << ']';
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
@ -1102,7 +1102,7 @@ struct TORCH_API RRefType
|
||||
|
||||
std::string str() const override {
|
||||
std::stringstream ss;
|
||||
ss << "RRef(" << getElementType()->str() << ")";
|
||||
ss << "RRef(" << getElementType()->str() << ')';
|
||||
return ss.str();
|
||||
}
|
||||
TypePtr createWithContained(
|
||||
@ -1115,7 +1115,7 @@ struct TORCH_API RRefType
|
||||
|
||||
std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override {
|
||||
std::stringstream ss;
|
||||
ss << "RRef[" << getElementType()->annotation_str(printer) << "]";
|
||||
ss << "RRef[" << getElementType()->annotation_str(printer) << ']';
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
@ -11,7 +11,7 @@ std::string toString(const OperatorName& opName) {
|
||||
std::ostream& operator<<(std::ostream& os, const OperatorName& opName) {
|
||||
os << opName.name;
|
||||
if (!opName.overload_name.empty()) {
|
||||
os << "." << opName.overload_name;
|
||||
os << '.' << opName.overload_name;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
@ -65,7 +65,7 @@ VaryingShape<T> VaryingShape<T>::merge(const VaryingShape<T>& other) const {
|
||||
|
||||
template <typename T>
|
||||
std::ostream& operator<<(std::ostream& out, const VaryingShape<T>& vs) {
|
||||
out << "(";
|
||||
out << '(';
|
||||
if (!vs.size()) {
|
||||
out << "*)";
|
||||
return out;
|
||||
@ -79,10 +79,10 @@ std::ostream& operator<<(std::ostream& out, const VaryingShape<T>& vs) {
|
||||
if (v.has_value()) {
|
||||
out << v.value();
|
||||
} else {
|
||||
out << "*";
|
||||
out << '*';
|
||||
}
|
||||
}
|
||||
out << ")";
|
||||
out << ')';
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -105,7 +105,7 @@ std::ostream& operator<<(
|
||||
}
|
||||
auto sizes_opt = ss.sizes();
|
||||
|
||||
os << "(";
|
||||
os << '(';
|
||||
for (size_t i = 0; i < rank_opt.value(); i++) {
|
||||
if (i > 0) {
|
||||
os << ", ";
|
||||
@ -113,10 +113,10 @@ std::ostream& operator<<(
|
||||
if(sizes_opt.has_value() && sizes_opt.value()[i].is_static()) {
|
||||
os << sizes_opt.value()[i];
|
||||
} else {
|
||||
os << "*";
|
||||
os << '*';
|
||||
}
|
||||
}
|
||||
os << ")";
|
||||
os << ')';
|
||||
|
||||
return os;
|
||||
}
|
||||
@ -131,17 +131,17 @@ std::ostream& operator<<(std::ostream& os, const ShapeSymbol& s) {
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Stride& s) {
|
||||
os << "{";
|
||||
os << '{';
|
||||
if (s.stride_index_.has_value()) {
|
||||
os << *s.stride_index_;
|
||||
} else {
|
||||
os << "*";
|
||||
os << '*';
|
||||
}
|
||||
os << ":";
|
||||
os << ':';
|
||||
if (s.stride_.has_value()) {
|
||||
os << *s.stride_;
|
||||
} else {
|
||||
os << "*";
|
||||
os << '*';
|
||||
}
|
||||
os << '}';
|
||||
return os;
|
||||
|
||||
@ -67,7 +67,7 @@ std::ostream& operator<<(std::ostream & out, const Type & t) {
|
||||
bool has_valid_strides_info = ndim > 0 &&
|
||||
value->strides().isComplete() && value->strides().size() == ndim;
|
||||
|
||||
out << "(";
|
||||
out << '(';
|
||||
size_t i = 0;
|
||||
bool symbolic = type_verbosity() == TypeVerbosity::Symbolic;
|
||||
for (i = 0; i < *ndim; ++i) {
|
||||
@ -79,7 +79,7 @@ std::ostream& operator<<(std::ostream & out, const Type & t) {
|
||||
} else if (symbolic) {
|
||||
out << value->symbolic_sizes().at(i);
|
||||
} else {
|
||||
out << "*";
|
||||
out << '*';
|
||||
}
|
||||
}
|
||||
if (has_valid_strides_info &&
|
||||
@ -91,7 +91,7 @@ std::ostream& operator<<(std::ostream & out, const Type & t) {
|
||||
}
|
||||
out << value->strides()[i].value();
|
||||
}
|
||||
out << "]";
|
||||
out << ']';
|
||||
}
|
||||
if (type_verbosity() >= TypeVerbosity::Full) {
|
||||
if (value->requiresGrad()) {
|
||||
@ -107,12 +107,12 @@ std::ostream& operator<<(std::ostream & out, const Type & t) {
|
||||
out << "device=" << *value->device();
|
||||
}
|
||||
}
|
||||
out << ")";
|
||||
out << ')';
|
||||
} else {
|
||||
if (type_verbosity() >= TypeVerbosity::Full) {
|
||||
size_t i = 0;
|
||||
if (value->requiresGrad()) {
|
||||
out << "("
|
||||
out << '('
|
||||
<< "requires_grad=" << *value->requiresGrad();
|
||||
i++;
|
||||
}
|
||||
@ -120,7 +120,7 @@ std::ostream& operator<<(std::ostream & out, const Type & t) {
|
||||
out << ((i++ > 0) ? ", " : "(") << "device=" << *value->device();
|
||||
}
|
||||
if (i > 0) {
|
||||
out << ")";
|
||||
out << ')';
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -133,18 +133,18 @@ std::ostream& operator<<(std::ostream & out, const Type & t) {
|
||||
out << *prim << "[]";
|
||||
} else if (t.kind() == TypeKind::OptionalType) {
|
||||
auto prim = t.castRaw<OptionalType>()->getElementType();
|
||||
out << *prim << "?";
|
||||
out << *prim << '?';
|
||||
} else if(t.kind() == TypeKind::FutureType) {
|
||||
auto elem = t.castRaw<FutureType>()->getElementType();
|
||||
out << "Future[" << *elem << "]";
|
||||
out << "Future[" << *elem << ']';
|
||||
} else if(t.kind() == TypeKind::RRefType) {
|
||||
auto elem = t.castRaw<RRefType>()->getElementType();
|
||||
out << "RRef[" << *elem << "]";
|
||||
out << "RRef[" << *elem << ']';
|
||||
} else if(auto tup = t.cast<TupleType>()) {
|
||||
if (tup->schema()) {
|
||||
out << "NamedTuple";
|
||||
}
|
||||
out << "(";
|
||||
out << '(';
|
||||
for(size_t i = 0; i < tup->elements().size(); ++i) {
|
||||
if(i > 0)
|
||||
out << ", ";
|
||||
@ -160,7 +160,7 @@ std::ostream& operator<<(std::ostream & out, const Type & t) {
|
||||
out << *(tup->elements()[i]);
|
||||
}
|
||||
}
|
||||
out << ")";
|
||||
out << ')';
|
||||
} else if (t.kind() == TypeKind::FunctionType) {
|
||||
out << "Function";
|
||||
} else {
|
||||
@ -475,7 +475,7 @@ std::optional<TypePtr> unifyTypeList(
|
||||
why_not << "Could not unify type list since element " << i << " of type "
|
||||
<< elements.at(i)->repr_str()
|
||||
<< " did not match the types before it ("
|
||||
<< ret_type->repr_str() << ")";
|
||||
<< ret_type->repr_str() << ')';
|
||||
return std::nullopt;
|
||||
}
|
||||
ret_type = *maybe_unified;
|
||||
@ -680,7 +680,7 @@ TORCH_API bool elementTypeCanBeInferredFromMembers(const TypePtr& elem_type) {
|
||||
return false;
|
||||
}
|
||||
if (elem_type->kind() == AnyType::Kind) {
|
||||
// List of Any can contains heterogenous types
|
||||
// List of Any can contains heterogeneous types
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -907,13 +907,13 @@ std::string TupleType::str() const {
|
||||
// NOLINTNEXTLINE(bugprone-unchecked-optional-access)
|
||||
ss << name()->qualifiedName();
|
||||
} else {
|
||||
ss << "(";
|
||||
ss << '(';
|
||||
for(size_t i = 0; i < elements().size(); ++i) {
|
||||
if(i > 0)
|
||||
ss << ", ";
|
||||
ss << elements()[i]->str();
|
||||
}
|
||||
ss << ")";
|
||||
ss << ')';
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
@ -1003,8 +1003,8 @@ bool InterfaceType::isSubTypeImpl(
|
||||
*why_not << "Method on interface '" << lhs.repr_str()
|
||||
<< "' (1) is not compatible with interface '"
|
||||
<< rhs.repr_str() << "' (2)\n"
|
||||
<< " (1) " << *self_schema << "\n"
|
||||
<< " (2) " << schema << "\n";
|
||||
<< " (1) " << *self_schema << '\n'
|
||||
<< " (2) " << schema << '\n';
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
@ -1078,7 +1078,7 @@ SymbolicShape SymbolicShape::merge(const SymbolicShape& other) const {
|
||||
}
|
||||
|
||||
void SymbolicShape::dump() const {
|
||||
std::cout << *this << "\n";
|
||||
std::cout << *this << '\n';
|
||||
}
|
||||
|
||||
bool EnumType::isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const {
|
||||
|
||||
@ -205,9 +205,9 @@ UnionType::UnionType(std::vector<TypePtr> reference, TypeKind kind) : SharedType
|
||||
for (const auto i : c10::irange(reference.size())) {
|
||||
msg << reference[i]->repr_str();
|
||||
if (i > 0) {
|
||||
msg << ",";
|
||||
msg << ',';
|
||||
}
|
||||
msg << " ";
|
||||
msg << ' ';
|
||||
}
|
||||
msg << "} has the single type " << types_[0]->repr_str()
|
||||
<< ". Use the common supertype instead of creating a Union"
|
||||
|
||||
@ -80,7 +80,7 @@ std::ostream& operator<<(std::ostream& stream, const Vectorized<T>& vec) {
|
||||
}
|
||||
stream << buf[i];
|
||||
}
|
||||
stream << "]";
|
||||
stream << ']';
|
||||
return stream;
|
||||
}
|
||||
|
||||
|
||||
@ -55,7 +55,7 @@ std::ostream& operator<<(std::ostream& stream, const Vectorized<T>& vec) {
|
||||
}
|
||||
stream << buf[i];
|
||||
}
|
||||
stream << "]";
|
||||
stream << ']';
|
||||
return stream;
|
||||
}
|
||||
|
||||
|
||||
@ -411,16 +411,16 @@ std::string CUDAHooks::showConfig() const {
|
||||
// HIP_VERSION value format was changed after ROCm v4.2 to include the patch number
|
||||
if(v < 500) {
|
||||
// If major=xx, minor=yy then format -> xxyy
|
||||
oss << (v / 100) << "." << (v % 10);
|
||||
oss << (v / 100) << '.' << (v % 10);
|
||||
}
|
||||
else {
|
||||
// If major=xx, minor=yy & patch=zzzzz then format -> xxyyzzzzz
|
||||
oss << (v / 10000000) << "." << (v / 100000 % 100) << "." << (v % 100000);
|
||||
oss << (v / 10000000) << '.' << (v / 100000 % 100) << '.' << (v % 100000);
|
||||
}
|
||||
#else
|
||||
oss << (v / 1000) << "." << (v / 10 % 100);
|
||||
oss << (v / 1000) << '.' << (v / 10 % 100);
|
||||
if (v % 10 != 0) {
|
||||
oss << "." << (v % 10);
|
||||
oss << '.' << (v % 10);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
@ -431,16 +431,16 @@ std::string CUDAHooks::showConfig() const {
|
||||
oss << " - HIP Runtime ";
|
||||
#endif
|
||||
printCudaStyleVersion(runtimeVersion);
|
||||
oss << "\n";
|
||||
oss << '\n';
|
||||
|
||||
// TODO: Make HIPIFY understand CUDART_VERSION macro
|
||||
#if !defined(USE_ROCM)
|
||||
if (runtimeVersion != CUDART_VERSION) {
|
||||
oss << " - Built with CUDA Runtime ";
|
||||
printCudaStyleVersion(CUDART_VERSION);
|
||||
oss << "\n";
|
||||
oss << '\n';
|
||||
}
|
||||
oss << " - NVCC architecture flags: " << NVCC_FLAGS_EXTRA << "\n";
|
||||
oss << " - NVCC architecture flags: " << NVCC_FLAGS_EXTRA << '\n';
|
||||
#endif
|
||||
|
||||
#if !defined(USE_ROCM)
|
||||
@ -448,9 +448,9 @@ std::string CUDAHooks::showConfig() const {
|
||||
|
||||
|
||||
auto printCudnnStyleVersion = [&](size_t v) {
|
||||
oss << (v / 1000) << "." << (v / 100 % 10);
|
||||
oss << (v / 1000) << '.' << (v / 100 % 10);
|
||||
if (v % 100 != 0) {
|
||||
oss << "." << (v % 100);
|
||||
oss << '.' << (v % 100);
|
||||
}
|
||||
};
|
||||
|
||||
@ -461,22 +461,22 @@ std::string CUDAHooks::showConfig() const {
|
||||
if (cudnnCudartVersion != CUDART_VERSION) {
|
||||
oss << " (built against CUDA ";
|
||||
printCudaStyleVersion(cudnnCudartVersion);
|
||||
oss << ")";
|
||||
oss << ')';
|
||||
}
|
||||
oss << "\n";
|
||||
oss << '\n';
|
||||
if (cudnnVersion != CUDNN_VERSION) {
|
||||
oss << " - Built with CuDNN ";
|
||||
printCudnnStyleVersion(CUDNN_VERSION);
|
||||
oss << "\n";
|
||||
oss << '\n';
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
// TODO: Check if miopen has the functions above and unify
|
||||
oss << " - MIOpen " << MIOPEN_VERSION_MAJOR << "." << MIOPEN_VERSION_MINOR << "." << MIOPEN_VERSION_PATCH << "\n";
|
||||
oss << " - MIOpen " << MIOPEN_VERSION_MAJOR << '.' << MIOPEN_VERSION_MINOR << '.' << MIOPEN_VERSION_PATCH << '\n';
|
||||
#endif
|
||||
|
||||
#if AT_MAGMA_ENABLED()
|
||||
oss << " - Magma " << MAGMA_VERSION_MAJOR << "." << MAGMA_VERSION_MINOR << "." << MAGMA_VERSION_MICRO << "\n";
|
||||
oss << " - Magma " << MAGMA_VERSION_MAJOR << '.' << MAGMA_VERSION_MINOR << '.' << MAGMA_VERSION_MICRO << '\n';
|
||||
#endif
|
||||
|
||||
return oss.str();
|
||||
|
||||
@ -42,7 +42,7 @@ static inline void launch_jitted_vectorized_kernel_dynamic(
|
||||
|
||||
// The cache key includes all the parameters to generate_code + vec_size + dev_idx
|
||||
std::stringstream ss;
|
||||
ss << nInputs << "_" << nOutputs << f;
|
||||
ss << nInputs << '_' << nOutputs << f;
|
||||
ss << f_inputs_type_str << compute_type_str << result_type_str;
|
||||
ss << static_cast<int>(at::cuda::jit::BinaryFuncVariant::NoScalar);
|
||||
ss << extra_args_types;
|
||||
@ -144,7 +144,7 @@ static inline void launch_jitted_unrolled_kernel_dynamic(
|
||||
|
||||
// The cache key includes all the parameters to generate_code + dev_idx
|
||||
std::stringstream ss;
|
||||
ss << nInputs << "_" << nOutputs << f;
|
||||
ss << nInputs << '_' << nOutputs << f;
|
||||
ss << f_inputs_type_str << compute_type_str << result_type_str;
|
||||
ss << contiguous << dynamic_casting;
|
||||
ss << static_cast<int>(at::cuda::jit::BinaryFuncVariant::NoScalar);
|
||||
|
||||
@ -52,10 +52,10 @@ TuningContext* getTuningContext() {
|
||||
std::ostream& operator<<(std::ostream& stream, const ResultEntry& entry) {
|
||||
static const bool blaslog = c10::utils::get_env("PYTORCH_TUNABLEOP_BLAS_LOG") == "1";
|
||||
if (!blaslog) {
|
||||
return stream << entry.key_ << "," << entry.time_;
|
||||
return stream << entry.key_ << ',' << entry.time_;
|
||||
}
|
||||
else {
|
||||
return stream << entry.key_ << "," << entry.time_ << ",BLAS_PARAMS: " << entry.blas_sig_;
|
||||
return stream << entry.key_ << ',' << entry.time_ << ",BLAS_PARAMS: " << entry.blas_sig_;
|
||||
}
|
||||
}
|
||||
|
||||
@ -156,10 +156,10 @@ void TuningResultsManager::RecordUntuned( std::ofstream& untuned_file, const std
|
||||
if (isNew) {
|
||||
static const bool blaslog = c10::utils::get_env("PYTORCH_TUNABLEOP_BLAS_LOG") == "1";
|
||||
if (!blaslog) {
|
||||
untuned_file << op_signature << "," << params_signature << std::endl;
|
||||
untuned_file << op_signature << ',' << params_signature << std::endl;
|
||||
}
|
||||
else {
|
||||
untuned_file << op_signature << "," << params_signature << ",BLAS_PARAMS: " << blas_signature << std::endl;
|
||||
untuned_file << op_signature << ',' << params_signature << ",BLAS_PARAMS: " << blas_signature << std::endl;
|
||||
}
|
||||
TUNABLE_LOG3("Untuned,", op_signature, ",", params_signature);
|
||||
}
|
||||
@ -201,7 +201,7 @@ void TuningResultsManager::InitRealtimeAppend(const std::string& filename, const
|
||||
|
||||
if(!file_exists || file_empty) {
|
||||
for(const auto& [key, val] : validators) {
|
||||
(*realtime_out_) << "Validator," << key << "," << val << std::endl;
|
||||
(*realtime_out_) << "Validator," << key << ',' << val << std::endl;
|
||||
realtime_out_->flush();
|
||||
}
|
||||
validators_written_ = true;
|
||||
@ -219,7 +219,7 @@ void TuningResultsManager::AppendResultLine(const std::string& op_sig, const std
|
||||
return;
|
||||
}
|
||||
|
||||
(*realtime_out_) << op_sig << "," << param_sig << "," << result << std::endl;
|
||||
(*realtime_out_) << op_sig << ',' << param_sig << ',' << result << std::endl;
|
||||
realtime_out_->flush(); //ensure immediate write to disk
|
||||
|
||||
TUNABLE_LOG3("Realtime append: ", op_sig, "(", param_sig, ") -> ", result);
|
||||
|
||||
@ -93,31 +93,31 @@ std::string cudnnTypeToString(cudnnDataType_t dtype) {
|
||||
return "CUDNN_DATA_UINT8x4";
|
||||
default:
|
||||
std::ostringstream oss;
|
||||
oss << "(unknown data-type " << static_cast<int>(dtype) << ")";
|
||||
oss << "(unknown data-type " << static_cast<int>(dtype) << ')';
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d) {
|
||||
out << "TensorDescriptor " << static_cast<void*>(d.desc()) << "\n";
|
||||
out << "TensorDescriptor " << static_cast<void*>(d.desc()) << '\n';
|
||||
int nbDims = 0;
|
||||
int dimA[CUDNN_DIM_MAX];
|
||||
int strideA[CUDNN_DIM_MAX];
|
||||
cudnnDataType_t dtype{};
|
||||
cudnnGetTensorNdDescriptor(d.desc(), CUDNN_DIM_MAX, &dtype, &nbDims, dimA, strideA);
|
||||
out << " type = " << cudnnTypeToString(dtype) << "\n";
|
||||
out << " nbDims = " << nbDims << "\n";
|
||||
out << " type = " << cudnnTypeToString(dtype) << '\n';
|
||||
out << " nbDims = " << nbDims << '\n';
|
||||
// Read out only nbDims of the arrays!
|
||||
out << " dimA = ";
|
||||
for (auto i : ArrayRef<int>{dimA, static_cast<size_t>(nbDims)}) {
|
||||
out << i << ", ";
|
||||
}
|
||||
out << "\n";
|
||||
out << '\n';
|
||||
out << " strideA = ";
|
||||
for (auto i : ArrayRef<int>{strideA, static_cast<size_t>(nbDims)}) {
|
||||
out << i << ", ";
|
||||
}
|
||||
out << "\n";
|
||||
out << '\n';
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -168,27 +168,27 @@ std::string cudnnMemoryFormatToString(cudnnTensorFormat_t tformat) {
|
||||
return "CUDNN_TENSOR_NHWC";
|
||||
default:
|
||||
std::ostringstream oss;
|
||||
oss << "(unknown cudnn tensor format " << static_cast<int>(tformat) << ")";
|
||||
oss << "(unknown cudnn tensor format " << static_cast<int>(tformat) << ')';
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream & out, const FilterDescriptor& d) {
|
||||
out << "FilterDescriptor " << static_cast<void*>(d.desc()) << "\n";
|
||||
out << "FilterDescriptor " << static_cast<void*>(d.desc()) << '\n';
|
||||
int nbDims = 0;
|
||||
int dimA[CUDNN_DIM_MAX];
|
||||
cudnnDataType_t dtype{};
|
||||
cudnnTensorFormat_t tformat{};
|
||||
cudnnGetFilterNdDescriptor(d.desc(), CUDNN_DIM_MAX, &dtype, &tformat, &nbDims, dimA);
|
||||
out << " type = " << cudnnTypeToString(dtype) << "\n";
|
||||
out << " tensor_format = " << cudnnMemoryFormatToString(tformat) << "\n";
|
||||
out << " nbDims = " << nbDims << "\n";
|
||||
out << " type = " << cudnnTypeToString(dtype) << '\n';
|
||||
out << " tensor_format = " << cudnnMemoryFormatToString(tformat) << '\n';
|
||||
out << " nbDims = " << nbDims << '\n';
|
||||
// Read out only nbDims of the arrays!
|
||||
out << " dimA = ";
|
||||
for (auto i : ArrayRef<int>{dimA, static_cast<size_t>(nbDims)}) {
|
||||
out << i << ", ";
|
||||
}
|
||||
out << "\n";
|
||||
out << '\n';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@ -346,15 +346,15 @@ void foreachTensorInplaceWithFlag(std::vector<IValue>& args, int64_t begin, int6
|
||||
}
|
||||
|
||||
std::ostream& operator<< (std::ostream& os, const DynamicLayer& layer) {
|
||||
os << layer.layerId() << ":" << layer.key();
|
||||
os << layer.layerId() << ':' << layer.key();
|
||||
return os;
|
||||
}
|
||||
std::ostream& operator<< (std::ostream& os, const std::vector<DynamicLayer>& dls) {
|
||||
os << "DynamicLayerStack[ ";
|
||||
for (const auto& layer : dls) {
|
||||
os << layer << " ";
|
||||
os << layer << ' ';
|
||||
}
|
||||
os << "]";
|
||||
os << ']';
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
@ -22,7 +22,7 @@ void dumpTensor(std::ostream& ss, const Tensor& tensor) {
|
||||
if (batched) {
|
||||
ss << "Batched[lvl=" << batched->level() << " dim=" << batched->bdim() << ", ";
|
||||
dumpTensor(ss, batched->value());
|
||||
ss << "]";
|
||||
ss << ']';
|
||||
return;
|
||||
}
|
||||
ss << "Tensor" << tensor.sizes();
|
||||
@ -36,7 +36,7 @@ void dumpTensor(std::ostream& ss, const Tensor& tensor) {
|
||||
ss << "dead, ";
|
||||
}
|
||||
dumpTensor(ss, wrapped->value());
|
||||
ss << "]";
|
||||
ss << ']';
|
||||
}
|
||||
|
||||
void TensorWrapper::refreshMetadata() {
|
||||
|
||||
@ -73,32 +73,32 @@ std::string miopenTypeToString(miopenDataType_t dtype) {
|
||||
return "miopenBFloat16";
|
||||
default:
|
||||
std::ostringstream oss;
|
||||
oss << "(unknown data-type " << static_cast<int>(dtype) << ")";
|
||||
oss << "(unknown data-type " << static_cast<int>(dtype) << ')';
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d) {
|
||||
out << "TensorDescriptor " << static_cast<void*>(d.desc()) << "\n";
|
||||
out << "TensorDescriptor " << static_cast<void*>(d.desc()) << '\n';
|
||||
int nbDims = 0;
|
||||
int dimA[MIOPEN_DIM_MAX];
|
||||
int strideA[MIOPEN_DIM_MAX];
|
||||
miopenDataType_t dtype;
|
||||
miopenGetTensorDescriptorSize(d.desc(), &nbDims);
|
||||
miopenGetTensorDescriptor(d.desc(), &dtype, dimA, strideA);
|
||||
out << " type = " << miopenTypeToString(dtype) << "\n";
|
||||
out << " nbDims = " << nbDims << "\n";
|
||||
out << " type = " << miopenTypeToString(dtype) << '\n';
|
||||
out << " nbDims = " << nbDims << '\n';
|
||||
// Read out only nbDims of the arrays!
|
||||
out << " dimA = ";
|
||||
for (auto i : ArrayRef<int>{dimA, static_cast<size_t>(nbDims)}) {
|
||||
out << i << ", ";
|
||||
}
|
||||
out << "\n";
|
||||
out << '\n';
|
||||
out << " strideA = ";
|
||||
for (auto i : ArrayRef<int>{strideA, static_cast<size_t>(nbDims)}) {
|
||||
out << i << ", ";
|
||||
}
|
||||
out << "\n";
|
||||
out << '\n';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@ -91,7 +91,7 @@ struct OperationInfo : BaseInfo {
|
||||
std::stringstream kernelStr;
|
||||
kernelStr << kernelName;
|
||||
for (const Tensor& tensor : tensors) {
|
||||
kernelStr << ":" << BaseInfo::buildTensorString(tensor, includeBufferId);
|
||||
kernelStr << ':' << BaseInfo::buildTensorString(tensor, includeBufferId);
|
||||
}
|
||||
return kernelStr.str();
|
||||
}
|
||||
|
||||
@ -39,9 +39,9 @@ std::string BaseInfo::buildTensorString(const Tensor& tensor, bool includeBuffer
|
||||
// see comments for INCLUDE_BUFFER_ID
|
||||
if (includeBufferId && deviceType == at::kMPS) {
|
||||
id<MTLBuffer> buffer = __builtin_bit_cast(id<MTLBuffer>, tensor.storage().data());
|
||||
tensorStr << "(buf#" << (getIMPSAllocator()->getBufferId(buffer)) << ":" << buffer.retainCount << ")";
|
||||
tensorStr << "(buf#" << (getIMPSAllocator()->getBufferId(buffer)) << ':' << buffer.retainCount << ')';
|
||||
}
|
||||
tensorStr << ":" << tensor.scalar_type() << tensor.sizes();
|
||||
tensorStr << ':' << tensor.scalar_type() << tensor.sizes();
|
||||
return tensorStr.str();
|
||||
} else {
|
||||
return "undefined";
|
||||
|
||||
@ -167,7 +167,7 @@ static void check_args(CheckedFrom c, IntArrayRef args, size_t expected_size, co
|
||||
std::stringstream ss;
|
||||
ss << arg_name << " should be greater than zero but got (";
|
||||
std::copy(args.begin(), args.end() - 1, std::ostream_iterator<int>(ss,", "));
|
||||
ss << args.back() << ")" << " (while checking arguments for " << c << ")";
|
||||
ss << args.back() << ")" << " (while checking arguments for " << c << ')';
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
@ -639,7 +639,7 @@ static std::ostream& operator<<(std::ostream & out, const ConvParams<T>& params)
|
||||
<< " deterministic = " << params.deterministic
|
||||
<< " cudnn_enabled = " << params.cudnn_enabled
|
||||
<< " allow_tf32 = " << params.allow_tf32
|
||||
<< "}";
|
||||
<< '}';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@ -847,7 +847,7 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const std::optional<int64_t
|
||||
<< ", hop_length=" << hop_length << ", win_length=" << win_length \
|
||||
<< ", window="; \
|
||||
if (window.defined()) { \
|
||||
SS << window.toString() << "{" << window.sizes() << "}"; \
|
||||
SS << window.toString() << '{' << window.sizes() << '}'; \
|
||||
} else { \
|
||||
SS << "None"; \
|
||||
} \
|
||||
@ -1046,7 +1046,7 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const std::optional<int64_
|
||||
<< ", hop_length=" << hop_length << ", win_length=" << win_length \
|
||||
<< ", window="; \
|
||||
if (window.defined()) { \
|
||||
SS << window.toString() << "{" << window.sizes() << "}"; \
|
||||
SS << window.toString() << '{' << window.sizes() << '}'; \
|
||||
} else { \
|
||||
SS << "None"; \
|
||||
} \
|
||||
|
||||
@ -523,7 +523,7 @@ Tensor _functional_assert_async_msg_cpu(
|
||||
}
|
||||
|
||||
void _print(std::string_view s) {
|
||||
std::cout << s << "\n";
|
||||
std::cout << s << '\n';
|
||||
}
|
||||
|
||||
// Sorting-based algorithm for isin(); used when the number of test elements is
|
||||
|
||||
@ -813,8 +813,43 @@ void smooth_l1_kernel(TensorIteratorBase& iter, double beta) {
|
||||
}
|
||||
|
||||
void huber_kernel(TensorIterator& iter, double delta) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(
|
||||
kBFloat16, kHalf, iter.dtype(), "huber_cpu", [&]() {
|
||||
// Special-case kHalf: compute in float for numerical stability
|
||||
if (iter.dtype() == kHalf) {
|
||||
const float delta_val(static_cast<float>(delta));
|
||||
const Vectorized<float> delta_vec(static_cast<float>(delta));
|
||||
const Vectorized<float> point_five_vec(static_cast<float>(0.5));
|
||||
cpu_kernel_vec(
|
||||
iter,
|
||||
// scalar lambda: convert half -> float, compute in float, cast back to half
|
||||
[&delta_val] (at::Half a, at::Half b) -> at::Half {
|
||||
float af = static_cast<float>(a);
|
||||
float bf = static_cast<float>(b);
|
||||
float z = std::abs(af - bf);
|
||||
float out = z < delta_val
|
||||
? 0.5f * z * z
|
||||
: delta_val * (z - 0.5f * delta_val);
|
||||
return static_cast<at::Half>(out);
|
||||
},
|
||||
[&delta_vec, &point_five_vec] (Vectorized<Half> a, Vectorized<Half> b) {
|
||||
auto [a0, a1] = convert_half_float(a);
|
||||
auto [b0, b1] = convert_half_float(b);
|
||||
auto z = (a0 - b0).abs();
|
||||
a0 = Vectorized<float>::blendv(
|
||||
point_five_vec * z * z,
|
||||
delta_vec * (z - point_five_vec * delta_vec),
|
||||
z >= delta_vec);
|
||||
z = (a1 - b1).abs();
|
||||
a1 = Vectorized<float>::blendv(
|
||||
point_five_vec * z * z,
|
||||
delta_vec * (z - point_five_vec * delta_vec),
|
||||
z >= delta_vec);
|
||||
return convert_float_half(a0, a1);
|
||||
}
|
||||
);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND(kBFloat16, iter.dtype(), "huber_cpu", [&]() {
|
||||
using Vec = Vectorized<scalar_t>;
|
||||
const scalar_t delta_val(delta);
|
||||
const Vec delta_val_vec(delta_val);
|
||||
@ -835,6 +870,7 @@ void huber_kernel(TensorIterator& iter, double delta) {
|
||||
z >= delta_val_vec);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void sigmoid_backward_kernel(TensorIteratorBase& iter) {
|
||||
|
||||
@ -346,8 +346,9 @@ void dispatch_bf16_grouped_kernel_on_tile_size(
|
||||
bool small = (M <= 128 || N <= 128);
|
||||
cudaDeviceProp* properties = at::cuda::getCurrentDeviceProperties();
|
||||
const bool sm10x = properties != nullptr && properties->major == 10;
|
||||
const bool sm11x = properties != nullptr && properties->major == 11;
|
||||
|
||||
if (sm10x) {
|
||||
if (sm10x || sm11x) {
|
||||
if (small){
|
||||
bf16bf16_grouped_gemm_impl_sm90_sm100<
|
||||
cutlass::arch::Sm100,
|
||||
|
||||
@ -607,6 +607,8 @@ _scaled_grouped_mm_cuda_v2(
|
||||
// scale shape checks
|
||||
_check_scales_blocked(mat_a, scale_a[0], 0 /* dim */, 0 /* arg_idx */);
|
||||
_check_scales_blocked(mat_b, scale_b[0], 1 /* dim */, 1 /* arg_idx */);
|
||||
// swizze checks
|
||||
TORCH_CHECK_VALUE(swizzle_a_enum.size() == 1 && swizzle_b_enum.size() == 1, "Expected single swizzle argument");
|
||||
return _mx8_mx8_bf16_grouped_mm_fbgemm(
|
||||
mat_a,
|
||||
mat_b,
|
||||
|
||||
@ -5,11 +5,69 @@
|
||||
#include <cuda_bf16.h>
|
||||
#endif
|
||||
|
||||
// ROCm 6.3 is planned to have these functions, but until then here they are.
|
||||
#if defined(USE_ROCM)
|
||||
#include <device_functions.h>
|
||||
#include <hip/hip_fp16.h>
|
||||
#include <hip/hip_bf16.h>
|
||||
#define ATOMICADD unsafeAtomicAdd
|
||||
|
||||
__device__ inline __hip_bfloat162 preview_unsafeAtomicAdd(__hip_bfloat162* address, __hip_bfloat162 value) {
|
||||
#if (defined(__gfx942__)) && \
|
||||
__has_builtin(__builtin_amdgcn_flat_atomic_fadd_v2bf16)
|
||||
typedef unsigned short __attribute__((ext_vector_type(2))) vec_short2;
|
||||
static_assert(sizeof(vec_short2) == sizeof(__hip_bfloat162_raw));
|
||||
union {
|
||||
__hip_bfloat162_raw bf162_raw;
|
||||
vec_short2 vs2;
|
||||
} u{static_cast<__hip_bfloat162_raw>(value)};
|
||||
u.vs2 = __builtin_amdgcn_flat_atomic_fadd_v2bf16((vec_short2*)address, u.vs2);
|
||||
return static_cast<__hip_bfloat162>(u.bf162_raw);
|
||||
#else
|
||||
static_assert(sizeof(unsigned int) == sizeof(__hip_bfloat162_raw));
|
||||
union u_hold {
|
||||
__hip_bfloat162_raw h2r;
|
||||
unsigned int u32;
|
||||
};
|
||||
u_hold old_val, new_val;
|
||||
old_val.u32 = __hip_atomic_load((unsigned int*)address, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
do {
|
||||
new_val.h2r = __hadd2(old_val.h2r, value);
|
||||
} while (!__hip_atomic_compare_exchange_strong(
|
||||
(unsigned int*)address, &old_val.u32, new_val.u32,
|
||||
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT));
|
||||
return old_val.h2r;
|
||||
#endif
|
||||
}
|
||||
|
||||
__device__ inline __half2 preview_unsafeAtomicAdd(__half2* address, __half2 value) {
|
||||
#if (defined(__gfx942__)) && \
|
||||
__has_builtin(__builtin_amdgcn_flat_atomic_fadd_v2f16)
|
||||
// The api expects an ext_vector_type of half
|
||||
typedef _Float16 __attribute__((ext_vector_type(2))) vec_fp162;
|
||||
static_assert(sizeof(vec_fp162) == sizeof(__half2_raw));
|
||||
union {
|
||||
__half2_raw h2r;
|
||||
vec_fp162 fp16;
|
||||
} u {static_cast<__half2_raw>(value)};
|
||||
u.fp16 = __builtin_amdgcn_flat_atomic_fadd_v2f16((vec_fp162*)address, u.fp16);
|
||||
return static_cast<__half2>(u.h2r);
|
||||
#else
|
||||
static_assert(sizeof(__half2_raw) == sizeof(unsigned int));
|
||||
union u_hold {
|
||||
__half2_raw h2r;
|
||||
unsigned int u32;
|
||||
};
|
||||
u_hold old_val, new_val;
|
||||
old_val.u32 = __hip_atomic_load((unsigned int*)address, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
do {
|
||||
new_val.h2r = __hadd2(old_val.h2r, value);
|
||||
} while (!__hip_atomic_compare_exchange_strong(
|
||||
(unsigned int*)address, &old_val.u32, new_val.u32,
|
||||
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT));
|
||||
return old_val.h2r;
|
||||
#endif
|
||||
}
|
||||
#define ATOMICADD preview_unsafeAtomicAdd
|
||||
#define NATIVE_ZERO_BF16 __float2bfloat16(0.0f)
|
||||
#else
|
||||
#define ATOMICADD atomicAdd
|
||||
|
||||
@ -11,7 +11,7 @@ static inline std::ostream& operator<<(std::ostream& out, dim3 dim) {
|
||||
if (dim.y == 1 && dim.z == 1) {
|
||||
out << dim.x;
|
||||
} else {
|
||||
out << "[" << dim.x << "," << dim.y << "," << dim.z << "]";
|
||||
out << '[' << dim.x << ',' << dim.y << ',' << dim.z << ']';
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@ -27,7 +27,7 @@ std::ostream& operator<<(std::ostream& out, const ReduceConfig& config) {
|
||||
out << "input_mult=[";
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (i != 0) {
|
||||
out << ",";
|
||||
out << ',';
|
||||
}
|
||||
out << config.input_mult[i];
|
||||
}
|
||||
@ -35,7 +35,7 @@ std::ostream& operator<<(std::ostream& out, const ReduceConfig& config) {
|
||||
out << "output_mult=[";
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (i != 0) {
|
||||
out << ",";
|
||||
out << ',';
|
||||
}
|
||||
out << config.output_mult[i];
|
||||
}
|
||||
@ -49,7 +49,7 @@ std::ostream& operator<<(std::ostream& out, const ReduceConfig& config) {
|
||||
out << "block=" << config.block() << ", ";
|
||||
out << "grid=" << config.grid() << ", ";
|
||||
out << "global_memory_size=" << config.global_memory_size();
|
||||
out << ")";
|
||||
out << ')';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@ -958,8 +958,9 @@ void dispatch_fp8_rowwise_kernel_on_sm(
|
||||
const bool sm89 = properties != nullptr && properties->major == 8 && properties->minor == 9;
|
||||
const bool sm9x = properties != nullptr && properties->major == 9;
|
||||
const bool sm10x = properties != nullptr && properties->major == 10;
|
||||
const bool sm11x = properties != nullptr && properties->major == 11;
|
||||
const bool sm12x = properties != nullptr && properties->major == 12;
|
||||
if (!(sm89 || sm9x || sm10x || sm12x)) {
|
||||
if (!(sm89 || sm9x || sm10x || sm11x || sm12x)) {
|
||||
TORCH_CHECK(
|
||||
false, "Rowwise scaling is not currently supported on your device");
|
||||
}
|
||||
@ -968,7 +969,7 @@ void dispatch_fp8_rowwise_kernel_on_sm(
|
||||
dispatch_fp8_rowwise_kernel_on_cluster_size_and_transpose<
|
||||
/*ArchTag=*/cutlass::arch::Sm90,
|
||||
Types...>(XQ, WQ, x_scale, w_scale, bias, out);
|
||||
} else if (sm10x) {
|
||||
} else if (sm10x || sm11x) {
|
||||
dispatch_fp8_rowwise_kernel_on_cluster_size_and_transpose<
|
||||
/*ArchTag=*/cutlass::arch::Sm100,
|
||||
Types...>(XQ, WQ, x_scale, w_scale, bias, out);
|
||||
|
||||
@ -364,9 +364,9 @@ void f8f8bf16_grouped_gemm_impl_sm90(
|
||||
// reinterpret_cast<ProblemShape::UnderlyingProblemShape*>(
|
||||
// stride_output_h + group_count);
|
||||
|
||||
// std::cout << "PTRS " << mat_a.data_ptr() << " " << mat_b.data_ptr() << "
|
||||
// std::cout << "PTRS " << mat_a.data_ptr() << ' ' << mat_b.data_ptr() << "
|
||||
// "
|
||||
// << out.data_ptr() << " " << scale_a.data_ptr() << " "
|
||||
// << out.data_ptr() << ' ' << scale_a.data_ptr() << ' '
|
||||
// << scale_b.data_ptr() << "\n";
|
||||
// for (int i = 0; i < group_count; i++) {
|
||||
// std::cout << "A " << (void*)inputA_ptrs_h[i] << "\n";
|
||||
|
||||
@ -1057,14 +1057,14 @@ std::string generate_code(
|
||||
// TODO these arrays are potentially of the different types, use function
|
||||
// traits to determine the types
|
||||
declare_load_arrays << f_inputs_type << " arg" << std::to_string(i)
|
||||
<< "[" << std::to_string(thread_work_size) << "];\n";
|
||||
<< '[' << std::to_string(thread_work_size) << "];\n";
|
||||
}
|
||||
env.s("declare_load_arrays", declare_load_arrays.str());
|
||||
|
||||
std::stringstream declare_store_arrays;
|
||||
for (int i = 0; i < nOutputs; i++) {
|
||||
declare_store_arrays << result_type << " out" << std::to_string(i)
|
||||
<< "[" << std::to_string(thread_work_size) << "];\n";
|
||||
<< '[' << std::to_string(thread_work_size) << "];\n";
|
||||
}
|
||||
env.s("declare_store_arrays", declare_store_arrays.str());
|
||||
|
||||
@ -1217,7 +1217,7 @@ std::string generate_code(
|
||||
for (const auto i : c10::irange(nInputs)){
|
||||
auto i_string = std::to_string(i);
|
||||
vector_inputs << "auto * input" << i_string <<
|
||||
" = reinterpret_cast<const scalar_t*>(data[" << i_string << "+" << nOutputs << "])" <<
|
||||
" = reinterpret_cast<const scalar_t*>(data[" << i_string << '+' << nOutputs << "])" <<
|
||||
" + block_work_size * idx;\n";
|
||||
}
|
||||
env.s("vector_inputs", vector_inputs.str());
|
||||
@ -1543,17 +1543,17 @@ NvrtcFunction jit_pwise_function(
|
||||
|
||||
// Constructs file path by appending constructed cubin name to cache path
|
||||
std::stringstream ss;
|
||||
ss << *cache_dir << "/";
|
||||
ss << *cache_dir << '/';
|
||||
ss << kernel_name;
|
||||
#ifdef USE_ROCM
|
||||
ss << "_arch" << prop->gcnArchName;
|
||||
#else
|
||||
ss << "_arch" << cuda_major << "." << cuda_minor;
|
||||
ss << "_arch" << cuda_major << '.' << cuda_minor;
|
||||
#endif
|
||||
ss << "_nvrtc" << nvrtc_major << "." << nvrtc_minor;
|
||||
ss << "_nvrtc" << nvrtc_major << '.' << nvrtc_minor;
|
||||
ss << (compile_to_sass ? "_sass" : "_ptx");
|
||||
ss << "_" << code.length();
|
||||
ss << "_" << hash_code;
|
||||
ss << '_' << code.length();
|
||||
ss << '_' << hash_code;
|
||||
file_path = ss.str();
|
||||
|
||||
std::ifstream readin{file_path, std::ios::in | std::ifstream::binary};
|
||||
|
||||
@ -82,15 +82,15 @@ namespace native {
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const ConvolutionParams& params) {
|
||||
out << "ConvolutionParams \n"
|
||||
<< " memory_format = " << params.memory_format << "\n"
|
||||
<< " data_type = " << cudnnTypeToString(params.dataType) << "\n"
|
||||
<< " padding = " << ArrayRef<int>{params.padding} << "\n"
|
||||
<< " stride = " << ArrayRef<int>{params.stride} << "\n"
|
||||
<< " dilation = " << ArrayRef<int>{params.dilation} << "\n"
|
||||
<< " groups = " << params.groups << "\n"
|
||||
<< " memory_format = " << params.memory_format << '\n'
|
||||
<< " data_type = " << cudnnTypeToString(params.dataType) << '\n'
|
||||
<< " padding = " << ArrayRef<int>{params.padding} << '\n'
|
||||
<< " stride = " << ArrayRef<int>{params.stride} << '\n'
|
||||
<< " dilation = " << ArrayRef<int>{params.dilation} << '\n'
|
||||
<< " groups = " << params.groups << '\n'
|
||||
<< " deterministic = " << (params.deterministic ? "true" : "false")
|
||||
<< "\n"
|
||||
<< " allow_tf32 = " << (params.allow_tf32 ? "true" : "false") << "\n";
|
||||
<< '\n'
|
||||
<< " allow_tf32 = " << (params.allow_tf32 ? "true" : "false") << '\n';
|
||||
|
||||
return out;
|
||||
}
|
||||
@ -173,16 +173,16 @@ std::string repro_from_args(const ConvolutionParams& params) {
|
||||
at::globalContext().float32Precision(
|
||||
at::Float32Backend::CUDA, at::Float32Op::MATMUL) ==
|
||||
at::Float32Precision::TF32)
|
||||
<< "\n";
|
||||
<< '\n';
|
||||
ss << "torch.backends.cudnn.benchmark = "
|
||||
<< pybool(at::globalContext().benchmarkCuDNN()) << "\n";
|
||||
<< pybool(at::globalContext().benchmarkCuDNN()) << '\n';
|
||||
ss << "torch.backends.cudnn.deterministic = " << pybool(params.deterministic)
|
||||
<< "\n";
|
||||
<< '\n';
|
||||
ss << "torch.backends.cudnn.allow_tf32 = " << pybool(params.allow_tf32)
|
||||
<< "\n";
|
||||
<< '\n';
|
||||
ss << "data = torch.randn(" << ArrayRef<int>(params.input_size, dim)
|
||||
<< ", dtype=" << full_dtype << ", ";
|
||||
ss << "device='cuda', requires_grad=True)" << to_channels_last << "\n";
|
||||
ss << "device='cuda', requires_grad=True)" << to_channels_last << '\n';
|
||||
ss << "net = torch.nn.Conv" << dim - 2 << "d(" << in_channels << ", "
|
||||
<< out_channels << ", ";
|
||||
ss << "kernel_size=" << ArrayRef<int>(¶ms.weight_size[2], dim - 2)
|
||||
@ -192,7 +192,7 @@ std::string repro_from_args(const ConvolutionParams& params) {
|
||||
ss << "dilation=" << ArrayRef<int>(params.dilation, dim - 2) << ", ";
|
||||
ss << "groups=" << params.groups << ")\n";
|
||||
ss << "net = net.cuda()." << partial_dtype << "()" << to_channels_last
|
||||
<< "\n";
|
||||
<< '\n';
|
||||
ss << "out = net(data)\n";
|
||||
ss << "out.backward(torch.randn_like(out))\n";
|
||||
ss << "torch.cuda.synchronize()\n\n";
|
||||
|
||||
@ -93,11 +93,10 @@ std::ostream& operator<<(std::ostream& out, const ConvolutionArgs& args) {
|
||||
<< "input: " << args.idesc // already has a trailing newline
|
||||
<< "output: " << args.odesc // already has a trailing newline
|
||||
<< "weight: " << args.wdesc // already has a trailing newline
|
||||
<< "Pointer addresses: "
|
||||
<< "\n"
|
||||
<< " input: " << args.input.const_data_ptr() << "\n"
|
||||
<< " output: " << args.output.const_data_ptr() << "\n"
|
||||
<< " weight: " << args.weight.const_data_ptr() << "\n";
|
||||
<< "Pointer addresses: " << '\n'
|
||||
<< " input: " << args.input.const_data_ptr() << '\n'
|
||||
<< " output: " << args.output.const_data_ptr() << '\n'
|
||||
<< " weight: " << args.weight.const_data_ptr() << '\n';
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -115,7 +115,7 @@ std::ostream& operator<<(
|
||||
std::copy(
|
||||
strides.begin(), strides.end() - 1, std::ostream_iterator<int>(oss, ","));
|
||||
oss << sizes.back();
|
||||
output << oss.str() << "}";
|
||||
output << oss.str() << '}';
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& out, const ConvParams& params) {
|
||||
<< " transposed = " << params.transposed
|
||||
<< " output_padding = " << IntArrayRef{params.output_padding}
|
||||
<< " groups = " << params.groups << " benchmark = " << params.benchmark
|
||||
<< " deterministic = " << params.deterministic << "}";
|
||||
<< " deterministic = " << params.deterministic << '}';
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
@ -147,6 +147,19 @@ class MetalShaderLibrary {
|
||||
const std::optional<c10::Scalar> alpha = std::nullopt,
|
||||
const std::optional<c10::ScalarType> scalar_arg_type = std::nullopt);
|
||||
|
||||
template <typename T>
|
||||
void exec_unary_kernel_with_params(
|
||||
TensorIteratorBase& iter,
|
||||
const std::string& name,
|
||||
T params,
|
||||
const std::string& params_type_name);
|
||||
template <typename T>
|
||||
void exec_binary_kernel_with_params(
|
||||
TensorIteratorBase& iter,
|
||||
const std::string& name,
|
||||
T params,
|
||||
const std::string& params_type_name);
|
||||
|
||||
protected:
|
||||
virtual MTLLibrary_t getLibrary();
|
||||
virtual MTLLibrary_t getLibrary(
|
||||
|
||||
@ -7,10 +7,12 @@
|
||||
#include <ATen/Tensor.h>
|
||||
#include <ATen/TensorIterator.h>
|
||||
#include <ATen/Utils.h>
|
||||
#include <ATen/mps/MPSProfiler.h>
|
||||
#include <ATen/mps/MPSStream.h>
|
||||
#include <ATen/native/mps/MetalShaderLibrary.h>
|
||||
#include <ATen/native/mps/TensorFactory.h>
|
||||
#include <c10/core/ScalarType.h>
|
||||
#include <fmt/format.h>
|
||||
#include <torch/library.h>
|
||||
#include <unordered_map>
|
||||
|
||||
@ -630,4 +632,147 @@ inline bool needsGather(const TensorBase& t) {
|
||||
return !is_macOS_15_0_or_newer && (!t.is_contiguous() || t.storage_offset());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void MetalShaderLibrary::exec_unary_kernel_with_params(TensorIteratorBase& iter,
|
||||
const std::string& name,
|
||||
T params,
|
||||
const std::string& params_type_name) {
|
||||
using namespace at::mps;
|
||||
// Decompose 64-bit tensor into 32-bit ones
|
||||
if (!iter.can_use_32bit_indexing()) {
|
||||
for (auto&& sub_iter : iter.with_32bit_indexing()) {
|
||||
exec_unary_kernel_with_params(sub_iter, name, params, params_type_name);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto inputTensor = iter.input(0);
|
||||
auto outputTensor = iter.output(0);
|
||||
uint32_t length = iter.numel();
|
||||
if (length == 0) {
|
||||
return;
|
||||
}
|
||||
auto kernel_name = fmt::format("{}_{}_{}_{}{}",
|
||||
name,
|
||||
iter.is_contiguous() ? "dense" : "strided",
|
||||
scalarToMetalTypeString(outputTensor),
|
||||
scalarToMetalTypeString(inputTensor),
|
||||
fmt::format("_{}", params_type_name));
|
||||
@autoreleasepool {
|
||||
auto cplState = getPipelineStateForFunc(kernel_name);
|
||||
|
||||
MPSStream* mpsStream = getCurrentMPSStream();
|
||||
dispatch_sync(mpsStream->queue(), ^() {
|
||||
auto computeEncoder = mpsStream->commandEncoder();
|
||||
|
||||
getMPSProfiler().beginProfileKernel(cplState, name, {inputTensor});
|
||||
|
||||
[computeEncoder setComputePipelineState:cplState];
|
||||
bind_iter_tensors(computeEncoder, iter);
|
||||
if (!iter.is_contiguous()) {
|
||||
mtl_setArgs<2>(computeEncoder,
|
||||
outputTensor.sizes(),
|
||||
inputTensor.strides(),
|
||||
outputTensor.strides(),
|
||||
inputTensor.ndimension());
|
||||
}
|
||||
detail::mtl_setArg(computeEncoder, params, iter.is_contiguous() ? 2 : 6);
|
||||
mtl_dispatch1DJob(computeEncoder, cplState, length);
|
||||
|
||||
getMPSProfiler().endProfileKernel(cplState);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void MetalShaderLibrary::exec_binary_kernel_with_params(TensorIteratorBase& iter,
|
||||
const std::string& name,
|
||||
T params,
|
||||
const std::string& params_type_name) {
|
||||
using namespace mps;
|
||||
// TODO: Figure a better place to downcast double scalars (probably in tensor iterator itself?)
|
||||
// Right now running something like 1.0-torch.rand(5, device='mps') will create iterator with
|
||||
// double as common dtype (because Python floating point are always 64-bit values)
|
||||
TORCH_CHECK(iter.output().scalar_type() != at::kDouble, "float64 is not supported on MPS");
|
||||
|
||||
// Skip for empty iterators
|
||||
if (iter.numel() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Decompose 64-bit tensor into 32-bit ones
|
||||
if (!iter.can_use_32bit_indexing()) {
|
||||
for (auto&& sub_iter : iter.with_32bit_indexing()) {
|
||||
exec_binary_kernel_with_params(sub_iter, name, params, params_type_name);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto convert_double_scalar = [](Tensor& t) {
|
||||
if (t.dim() != 0) {
|
||||
return;
|
||||
}
|
||||
if (t.scalar_type() == kDouble) {
|
||||
t = t.to(kFloat);
|
||||
} else if (t.scalar_type() == kComplexDouble) {
|
||||
t = t.to(kComplexFloat);
|
||||
}
|
||||
};
|
||||
|
||||
Tensor input = iter.input(0);
|
||||
Tensor other = iter.input(1);
|
||||
Tensor out = iter.output();
|
||||
|
||||
convert_double_scalar(input);
|
||||
convert_double_scalar(other);
|
||||
|
||||
MPSStream* mpsStream = getCurrentMPSStream();
|
||||
const auto cast_needed = input.scalar_type() != other.scalar_type();
|
||||
const auto suffix = iter.is_contiguous() ? "dense" : "strided";
|
||||
// TODO: Implicitly pass both input and output types to non-cast kernels
|
||||
const auto kernel_name = cast_needed
|
||||
? fmt::format("{}_{}_cast_{}_{}", name, suffix, scalarToMetalTypeString(out), params_type_name)
|
||||
: fmt::format("{}_{}_{}_{}_{}",
|
||||
name,
|
||||
suffix,
|
||||
scalarToMetalTypeString(out),
|
||||
scalarToMetalTypeString(input),
|
||||
params_type_name);
|
||||
dispatch_sync_with_rethrow(mpsStream->queue(), ^() {
|
||||
@autoreleasepool {
|
||||
auto computeEncoder = mpsStream->commandEncoder();
|
||||
auto binaryPSO = getPipelineStateForFunc(kernel_name);
|
||||
// this function call is a no-op if MPS Profiler is not enabled
|
||||
getMPSProfiler().beginProfileKernel(binaryPSO, kernel_name, {input, other});
|
||||
[computeEncoder setComputePipelineState:binaryPSO];
|
||||
// Set input and output tensors
|
||||
bind_iter_tensors(computeEncoder, iter);
|
||||
// Iterator is contiguous if all of its elements are dense in storage,
|
||||
// i.e. it's true for both row-first and column-first tensors
|
||||
if (iter.is_contiguous()) {
|
||||
detail::mtl_setArg(computeEncoder, params, 3);
|
||||
if (cast_needed) {
|
||||
std::array<int, 4> size_and_types = {static_cast<int>(c10::elementSize(input.scalar_type())),
|
||||
static_cast<int>(c10::elementSize(other.scalar_type())),
|
||||
static_cast<int>(input.scalar_type()),
|
||||
static_cast<int>(other.scalar_type())};
|
||||
mtl_setBytes(computeEncoder, size_and_types, 4);
|
||||
}
|
||||
} else {
|
||||
// Please note that shapes and strides of the iterator might be
|
||||
// different than that of its operands, for example binary op
|
||||
// between 4x4 tensor and scalar will result in 1D 16 element iterator
|
||||
std::array<int, 4> ndim_and_types = {iter.ndim(),
|
||||
static_cast<int>(input.scalar_type()),
|
||||
static_cast<int>(other.scalar_type()),
|
||||
static_cast<int>(out.scalar_type())};
|
||||
mtl_setArgs<3>(
|
||||
computeEncoder, params, iter.shape(), iter.strides(0), iter.strides(1), iter.strides(2), ndim_and_types);
|
||||
}
|
||||
mtl_dispatch1DJob(computeEncoder, binaryPSO, iter.numel());
|
||||
getMPSProfiler().endProfileKernel(binaryPSO);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace at::native::mps
|
||||
|
||||
16
aten/src/ATen/native/mps/kernels/Activation.h
Normal file
16
aten/src/ATen/native/mps/kernels/Activation.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
template <typename T>
|
||||
struct ELUParams {
|
||||
T alpha;
|
||||
T scale;
|
||||
T input_scale;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct ELUBackwardParams {
|
||||
T alpha;
|
||||
T scale;
|
||||
T input_scale;
|
||||
bool is_result;
|
||||
};
|
||||
@ -1,3 +1,4 @@
|
||||
#include <ATen/native/mps/kernels/Activation.h>
|
||||
#include <c10/metal/indexing.h>
|
||||
#include <c10/metal/special_math.h>
|
||||
#include <metal_stdlib>
|
||||
@ -99,6 +100,59 @@ REGISTER_BINARY_OP(hardswish_backward, float, float);
|
||||
REGISTER_BINARY_OP(hardswish_backward, half, half);
|
||||
REGISTER_BINARY_OP(hardswish_backward, bfloat, bfloat);
|
||||
|
||||
struct elu_functor {
|
||||
template <typename T>
|
||||
inline T operator()(const T self_, const ELUParams<T> params) {
|
||||
using op_T = opmath_t<T>;
|
||||
auto alpha = static_cast<op_T>(params.alpha);
|
||||
auto scale = static_cast<op_T>(params.scale);
|
||||
auto input_scale = static_cast<op_T>(params.input_scale);
|
||||
auto self = static_cast<op_T>(self_);
|
||||
auto neg_res = alpha * (::metal::precise::exp(self * input_scale) - 1);
|
||||
return static_cast<T>(scale * (self < 0 ? neg_res : self));
|
||||
}
|
||||
};
|
||||
|
||||
struct elu_backward_functor {
|
||||
template <typename T>
|
||||
inline T operator()(
|
||||
const T grad_output_,
|
||||
const T self_,
|
||||
ELUBackwardParams<T> params) {
|
||||
using op_T = opmath_t<T>;
|
||||
auto alpha = static_cast<op_T>(params.alpha);
|
||||
auto scale = static_cast<op_T>(params.scale);
|
||||
auto input_scale = static_cast<op_T>(params.input_scale);
|
||||
auto grad_output = static_cast<op_T>(grad_output_);
|
||||
auto self = static_cast<op_T>(self_);
|
||||
|
||||
if (params.is_result) {
|
||||
auto neg_coef = input_scale * (self + alpha * scale);
|
||||
return static_cast<T>(grad_output * (self <= 0 ? neg_coef : scale));
|
||||
} else {
|
||||
auto neg_coef = input_scale * alpha * scale *
|
||||
::metal::precise::exp(self * input_scale);
|
||||
return static_cast<T>(grad_output * (self <= 0 ? neg_coef : scale));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#define REGISTER_ELU_OP(T) \
|
||||
typedef ELUParams<T> ELUParams_##T; \
|
||||
REGISTER_UNARY_ALPHA_OP(elu, T, ELUParams_##T, T);
|
||||
|
||||
REGISTER_ELU_OP(float);
|
||||
REGISTER_ELU_OP(half);
|
||||
REGISTER_ELU_OP(bfloat);
|
||||
|
||||
#define REGISTER_ELU_BACKWARD_OP(T) \
|
||||
typedef ELUBackwardParams<T> ELUBackwardParams_##T; \
|
||||
REGISTER_BINARY_ALPHA_OP(elu_backward, T, ELUBackwardParams_##T, T);
|
||||
|
||||
REGISTER_ELU_BACKWARD_OP(float);
|
||||
REGISTER_ELU_BACKWARD_OP(half);
|
||||
REGISTER_ELU_BACKWARD_OP(bfloat);
|
||||
|
||||
struct leaky_relu_functor {
|
||||
template <typename T>
|
||||
inline T operator()(const T x, const T negative_slope) {
|
||||
|
||||
@ -11,8 +11,6 @@
|
||||
#include <ATen/ops/_log_softmax_native.h>
|
||||
#include <ATen/ops/_prelu_kernel_backward_native.h>
|
||||
#include <ATen/ops/_prelu_kernel_native.h>
|
||||
#include <ATen/ops/elu_backward_native.h>
|
||||
#include <ATen/ops/elu_native.h>
|
||||
#include <ATen/ops/gelu_backward_native.h>
|
||||
#include <ATen/ops/gelu_native.h>
|
||||
#include <ATen/ops/glu_backward_native.h>
|
||||
@ -698,194 +696,6 @@ TORCH_IMPL_FUNC(gelu_backward_out_mps)
|
||||
}
|
||||
}
|
||||
|
||||
static void elu_variants_out_mps(const Tensor& self,
|
||||
const Scalar& alpha,
|
||||
const Scalar& scale,
|
||||
const Scalar& input_scale,
|
||||
const Tensor& result,
|
||||
std::string func_name) {
|
||||
using namespace mps;
|
||||
using CachedGraph = MPSUnaryCachedGraph;
|
||||
|
||||
auto resultMemFormat = result.suggest_memory_format();
|
||||
bool executeGatherOp = !(self.is_contiguous(resultMemFormat) && result.is_contiguous(resultMemFormat));
|
||||
Tensor out;
|
||||
if (executeGatherOp) {
|
||||
out = at::empty_like(result, MemoryFormat::Contiguous);
|
||||
}
|
||||
|
||||
// Empty output
|
||||
if (result.numel() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
MPSStream* stream = getCurrentMPSStream();
|
||||
|
||||
@autoreleasepool {
|
||||
std::string key = func_name + ":" + getTensorsStringKey({self}) + ":" + std::to_string(alpha.to<double>()) + ":" +
|
||||
std::to_string(scale.to<double>()) + ":" + std::to_string(input_scale.to<double>());
|
||||
|
||||
auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
|
||||
MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
|
||||
|
||||
// scale * (max(0, x) + min(0, alpha * (exp(input_scale * x) - 1) ))
|
||||
|
||||
MPSGraphTensor* alphaTensor = [mpsGraph constantWithScalar:alpha.to<double>()
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(self)];
|
||||
|
||||
MPSGraphTensor* inputScaleTensor = [mpsGraph constantWithScalar:input_scale.to<double>()
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(self)];
|
||||
|
||||
MPSGraphTensor* scaleTensor = [mpsGraph constantWithScalar:scale.to<double>()
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(self)];
|
||||
MPSGraphTensor* unitTensor = [mpsGraph constantWithScalar:1.0f shape:@[ @1 ] dataType:getMPSDataType(self)];
|
||||
MPSGraphTensor* zeroTensor = [mpsGraph constantWithScalar:0.0f shape:@[ @1 ] dataType:getMPSDataType(self)];
|
||||
|
||||
MPSGraphTensor* scaledInputTensor = [mpsGraph multiplicationWithPrimaryTensor:inputTensor
|
||||
secondaryTensor:inputScaleTensor
|
||||
name:nil];
|
||||
MPSGraphTensor* exponentTensor = [mpsGraph exponentWithTensor:scaledInputTensor name:nil];
|
||||
MPSGraphTensor* exponentMinusOneTensor = [mpsGraph subtractionWithPrimaryTensor:exponentTensor
|
||||
secondaryTensor:unitTensor
|
||||
name:nil];
|
||||
MPSGraphTensor* alphaTimesTensor = [mpsGraph multiplicationWithPrimaryTensor:exponentMinusOneTensor
|
||||
secondaryTensor:alphaTensor
|
||||
name:nil];
|
||||
MPSGraphTensor* predicateTensor = [mpsGraph greaterThanWithPrimaryTensor:inputTensor
|
||||
secondaryTensor:zeroTensor
|
||||
name:nil];
|
||||
MPSGraphTensor* fusedOutput = [mpsGraph selectWithPredicateTensor:predicateTensor
|
||||
truePredicateTensor:inputTensor
|
||||
falsePredicateTensor:alphaTimesTensor
|
||||
name:nil];
|
||||
MPSGraphTensor* outputTensor = [mpsGraph multiplicationWithPrimaryTensor:fusedOutput
|
||||
secondaryTensor:scaleTensor
|
||||
name:nil];
|
||||
|
||||
newCachedGraph->inputTensor_ = inputTensor;
|
||||
newCachedGraph->outputTensor_ = outputTensor;
|
||||
});
|
||||
|
||||
auto selfPlaceholder = Placeholder(cachedGraph->inputTensor_, self, nil, executeGatherOp);
|
||||
auto outputPlaceholder = Placeholder(cachedGraph->outputTensor_, out.has_storage() ? out : result, nil, false);
|
||||
auto feeds = dictionaryFromPlaceholders(selfPlaceholder);
|
||||
runMPSGraph(stream, cachedGraph->graph(), feeds, outputPlaceholder);
|
||||
if (out.has_storage()) {
|
||||
result.copy_(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// scale * (max(0, x) + min(0, alpha * (exp(input_scale * x) - 1) ))
|
||||
TORCH_IMPL_FUNC(elu_out_mps)
|
||||
(const Tensor& self, const Scalar& alpha, const Scalar& scale, const Scalar& input_scale, const Tensor& result) {
|
||||
elu_variants_out_mps(self, alpha, scale, input_scale, result, "elu_out_mps");
|
||||
}
|
||||
|
||||
TORCH_IMPL_FUNC(elu_backward_out_mps)
|
||||
(const Tensor& grad_output,
|
||||
const Scalar& alpha,
|
||||
const Scalar& scale,
|
||||
const Scalar& input_scale,
|
||||
bool is_result,
|
||||
const Tensor& self_or_result,
|
||||
const Tensor& grad_input) {
|
||||
using namespace mps;
|
||||
using CachedGraph = MPSUnaryGradCachedGraph;
|
||||
auto gradMemFormat = grad_input.suggest_memory_format();
|
||||
bool executeGatherOp = !(grad_output.is_contiguous(gradMemFormat) && self_or_result.is_contiguous(gradMemFormat) &&
|
||||
grad_input.is_contiguous(gradMemFormat));
|
||||
Tensor out;
|
||||
if (executeGatherOp && gradMemFormat == MemoryFormat::ChannelsLast) {
|
||||
out = at::empty_like(grad_input, MemoryFormat::Contiguous);
|
||||
}
|
||||
|
||||
// Empty output
|
||||
if (grad_input.numel() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
MPSStream* stream = getCurrentMPSStream();
|
||||
|
||||
@autoreleasepool {
|
||||
std::string key = "elu_backward_out_mps:" + getTensorsStringKey({grad_output, self_or_result}) + ":" +
|
||||
std::to_string(alpha.to<double>()) + ":" + std::to_string(scale.to<double>()) + ":" +
|
||||
std::to_string(input_scale.to<double>()) + ":" + std::to_string(is_result);
|
||||
|
||||
auto cachedGraph = LookUpOrCreateCachedGraph<CachedGraph>(key, [&](auto mpsGraph, auto newCachedGraph) {
|
||||
MPSGraphTensor* gradOutputTensor = mpsGraphRankedPlaceHolder(mpsGraph, grad_output);
|
||||
MPSGraphTensor* selfOrResultTensor = mpsGraphRankedPlaceHolder(mpsGraph, self_or_result);
|
||||
MPSGraphTensor* lessThanZeroGradTensor = nil;
|
||||
|
||||
if (is_result) {
|
||||
MPSGraphTensor* alphaTensor = [mpsGraph constantWithScalar:alpha.to<double>()
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(grad_output)];
|
||||
MPSGraphTensor* resultPlusAlphaTensor = [mpsGraph additionWithPrimaryTensor:selfOrResultTensor
|
||||
secondaryTensor:alphaTensor
|
||||
name:nil];
|
||||
auto constMul = scale.to<double>() * input_scale.to<double>();
|
||||
MPSGraphTensor* constMulTensor = [mpsGraph constantWithScalar:constMul
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(grad_output)];
|
||||
lessThanZeroGradTensor = [mpsGraph multiplicationWithPrimaryTensor:resultPlusAlphaTensor
|
||||
secondaryTensor:constMulTensor
|
||||
name:nil];
|
||||
} else {
|
||||
MPSGraphTensor* inputScaleTensor = [mpsGraph constantWithScalar:input_scale.to<double>()
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(grad_output)];
|
||||
MPSGraphTensor* scaledInputTensor = [mpsGraph multiplicationWithPrimaryTensor:selfOrResultTensor
|
||||
secondaryTensor:inputScaleTensor
|
||||
name:nil];
|
||||
MPSGraphTensor* expTensor = [mpsGraph exponentWithTensor:scaledInputTensor name:nil];
|
||||
auto constMul = scale.to<double>() * input_scale.to<double>() * alpha.to<double>();
|
||||
MPSGraphTensor* constMulTensor = [mpsGraph constantWithScalar:constMul
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(grad_output)];
|
||||
lessThanZeroGradTensor = [mpsGraph multiplicationWithPrimaryTensor:expTensor
|
||||
secondaryTensor:constMulTensor
|
||||
name:nil];
|
||||
}
|
||||
|
||||
MPSGraphTensor* scaleTensor = [mpsGraph constantWithScalar:scale.to<double>()
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(grad_output)];
|
||||
MPSGraphTensor* zeroTensor = [mpsGraph constantWithScalar:0.0f
|
||||
shape:@[ @1 ]
|
||||
dataType:getMPSDataType(grad_output)];
|
||||
MPSGraphTensor* predicateTensor = [mpsGraph greaterThanWithPrimaryTensor:selfOrResultTensor
|
||||
secondaryTensor:zeroTensor
|
||||
name:nil];
|
||||
MPSGraphTensor* gradTensor = [mpsGraph selectWithPredicateTensor:predicateTensor
|
||||
truePredicateTensor:scaleTensor
|
||||
falsePredicateTensor:lessThanZeroGradTensor
|
||||
name:nil];
|
||||
MPSGraphTensor* gradInputTensor = [mpsGraph multiplicationWithPrimaryTensor:gradTensor
|
||||
secondaryTensor:gradOutputTensor
|
||||
name:nil];
|
||||
|
||||
newCachedGraph->gradOutputTensor_ = gradOutputTensor;
|
||||
newCachedGraph->inputTensor_ = selfOrResultTensor;
|
||||
newCachedGraph->gradInputTensor_ = gradInputTensor;
|
||||
});
|
||||
|
||||
Placeholder gradOutputPlaceholder = Placeholder(cachedGraph->gradOutputTensor_, grad_output, nil, executeGatherOp);
|
||||
Placeholder selfOrResultPlaceholder = Placeholder(cachedGraph->inputTensor_, self_or_result, nil, executeGatherOp);
|
||||
Placeholder gradInputPlaceholder =
|
||||
Placeholder(cachedGraph->gradInputTensor_, out.has_storage() ? out : grad_input, nil, false);
|
||||
|
||||
auto feeds = dictionaryFromPlaceholders(gradOutputPlaceholder, selfOrResultPlaceholder);
|
||||
runMPSGraph(stream, cachedGraph->graph(), feeds, gradInputPlaceholder);
|
||||
if (out.has_storage()) {
|
||||
grad_input.copy_(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TORCH_IMPL_FUNC(glu_out_mps)(const Tensor& self, const int64_t dim, const Tensor& output) {
|
||||
using namespace mps;
|
||||
using CachedGraph = MPSUnaryCachedGraph;
|
||||
|
||||
@ -1,8 +1,10 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/Dispatch.h>
|
||||
#include <ATen/TensorIterator.h>
|
||||
#include <ATen/mps/MPSProfiler.h>
|
||||
#include <ATen/native/Activation.h>
|
||||
#include <ATen/native/mps/OperationUtils.h>
|
||||
#include <ATen/native/mps/kernels/Activation.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace at::native {
|
||||
@ -41,6 +43,30 @@ static void hardswish_backward_kernel(at::TensorIterator& iter) {
|
||||
lib.exec_binary_kernel(iter, "hardswish_backward");
|
||||
}
|
||||
|
||||
static void elu_kernel(TensorIteratorBase& iter, const Scalar& alpha, const Scalar& scale, const Scalar& input_scale) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(c10::kHalf, c10::kBFloat16, iter.common_dtype(), "elu_mps", [&]() {
|
||||
ELUParams<scalar_t> params{alpha.to<scalar_t>(), scale.to<scalar_t>(), input_scale.to<scalar_t>()};
|
||||
lib.exec_unary_kernel_with_params(
|
||||
iter, "elu", params, fmt::format("ELUParams_{}", mps::scalarToMetalTypeString(iter.common_dtype())));
|
||||
});
|
||||
}
|
||||
|
||||
static void elu_backward_kernel(TensorIteratorBase& iter,
|
||||
const Scalar& alpha,
|
||||
const Scalar& scale,
|
||||
const Scalar& input_scale,
|
||||
bool is_result) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND2(c10::kHalf, c10::kBFloat16, iter.common_dtype(), "elu_backward_mps", [&]() {
|
||||
ELUBackwardParams<scalar_t> params{
|
||||
alpha.to<scalar_t>(), scale.to<scalar_t>(), input_scale.to<scalar_t>(), is_result};
|
||||
lib.exec_binary_kernel_with_params(
|
||||
iter,
|
||||
"elu_backward",
|
||||
params,
|
||||
fmt::format("ELUBackwardParams_{}", mps::scalarToMetalTypeString(iter.common_dtype())));
|
||||
});
|
||||
}
|
||||
|
||||
static void leaky_relu_kernel(TensorIteratorBase& iter, const Scalar& negative_slope) {
|
||||
lib.exec_unary_kernel(iter, "leaky_relu", negative_slope);
|
||||
}
|
||||
@ -56,6 +82,8 @@ REGISTER_DISPATCH(hardsigmoid_stub, hardsigmoid_kernel);
|
||||
REGISTER_DISPATCH(hardsigmoid_backward_stub, hardsigmoid_backward_kernel);
|
||||
REGISTER_DISPATCH(hardswish_stub, hardswish_kernel);
|
||||
REGISTER_DISPATCH(hardswish_backward_stub, hardswish_backward_kernel);
|
||||
REGISTER_DISPATCH(elu_stub, elu_kernel);
|
||||
REGISTER_DISPATCH(elu_backward_stub, elu_backward_kernel);
|
||||
REGISTER_DISPATCH(leaky_relu_stub, leaky_relu_kernel);
|
||||
REGISTER_DISPATCH(leaky_relu_backward_stub, leaky_relu_backward_kernel);
|
||||
|
||||
|
||||
@ -12064,8 +12064,7 @@
|
||||
device_check: NoCheck # TensorIterator
|
||||
python_module: nn
|
||||
dispatch:
|
||||
CPU, CUDA: elu_out
|
||||
MPS: elu_out_mps
|
||||
CPU, CUDA, MPS: elu_out
|
||||
|
||||
- func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
|
||||
structured_delegate: elu.out
|
||||
@ -12078,8 +12077,7 @@
|
||||
structured_inherits: TensorIteratorBase
|
||||
python_module: nn
|
||||
dispatch:
|
||||
CPU, CUDA: elu_backward_out
|
||||
MPS: elu_backward_out_mps
|
||||
CPU, CUDA, MPS: elu_backward_out
|
||||
|
||||
- func: elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result) -> Tensor
|
||||
structured_delegate: elu_backward.grad_input
|
||||
|
||||
@ -301,12 +301,12 @@ class AvgPoolMicrokernelTester {
|
||||
ASSERT_NEAR(
|
||||
float(int32_t(y[i * yStride() + k])), yFP[i * kc() + k], 0.5001f)
|
||||
<< "at pixel " << i << ", channel " << k << ", n = " << n()
|
||||
<< ", ks = " << kh() << "x" << kw() << " (" << ks()
|
||||
<< ", ks = " << kh() << 'x' << kw() << " (" << ks()
|
||||
<< "), kc = " << kc() << ", acc = " << yAcc[i * kc() + k];
|
||||
ASSERT_EQ(
|
||||
uint32_t(yRef[i * kc() + k]), uint32_t(y[i * yStride() + k]))
|
||||
<< "at pixel " << i << ", channel " << k << ", n = " << n()
|
||||
<< ", ks = " << kh() << "x" << kw() << " (" << ks()
|
||||
<< ", ks = " << kh() << 'x' << kw() << " (" << ks()
|
||||
<< "), kc = " << kc() << ", acc = " << yAcc[i * kc() + k];
|
||||
}
|
||||
}
|
||||
@ -396,12 +396,12 @@ class AvgPoolMicrokernelTester {
|
||||
ASSERT_NEAR(
|
||||
float(int32_t(y[i * yStride() + k])), yFP[i * kc() + k], 0.5001f)
|
||||
<< "at pixel " << i << ", channel " << k << ", n = " << n()
|
||||
<< ", ks = " << kh() << "x" << kw() << " (" << ks()
|
||||
<< ", ks = " << kh() << 'x' << kw() << " (" << ks()
|
||||
<< "), kc = " << kc() << ", acc = " << yAcc[i * kc() + k];
|
||||
ASSERT_EQ(
|
||||
uint32_t(yRef[i * kc() + k]), uint32_t(y[i * yStride() + k]))
|
||||
<< "at pixel " << i << ", channel " << k << ", n = " << n()
|
||||
<< ", ks = " << kh() << "x" << kw() << " (" << ks()
|
||||
<< ", ks = " << kh() << 'x' << kw() << " (" << ks()
|
||||
<< "), kc = " << kc() << ", acc = " << yAcc[i * kc() + k];
|
||||
}
|
||||
}
|
||||
|
||||
@ -232,7 +232,7 @@ class MaxPoolMicrokernelTester {
|
||||
ASSERT_EQ(
|
||||
uint32_t(yRef[i * kc() + k]), uint32_t(y[i * yStride() + k]))
|
||||
<< "at pixel " << i << ", channel " << k << ", n = " << n()
|
||||
<< ", ks = " << kh() << "x" << kw() << " (" << ks()
|
||||
<< ", ks = " << kh() << 'x' << kw() << " (" << ks()
|
||||
<< "), kc = " << kc();
|
||||
}
|
||||
}
|
||||
|
||||
@ -17,7 +17,7 @@ inline std::vector<T> _expand_param_if_needed(
|
||||
std::ostringstream ss;
|
||||
ss << "expected " << param_name << " to be a single integer value or a "
|
||||
<< "list of " << expected_dim << " values to match the convolution "
|
||||
<< "dimensions, but got " << param_name << "=" << list_param;
|
||||
<< "dimensions, but got " << param_name << '=' << list_param;
|
||||
TORCH_CHECK(false, ss.str());
|
||||
} else {
|
||||
return list_param.vec();
|
||||
|
||||
@ -358,9 +358,9 @@ std::string Adapter::stringize() const {
|
||||
std::string device_type = get_device_type_str(properties.deviceType);
|
||||
VkPhysicalDeviceLimits limits = properties.limits;
|
||||
|
||||
ss << "{" << std::endl;
|
||||
ss << '{' << std::endl;
|
||||
ss << " Physical Device Info {" << std::endl;
|
||||
ss << " apiVersion: " << v_major << "." << v_minor << std::endl;
|
||||
ss << " apiVersion: " << v_major << '.' << v_minor << std::endl;
|
||||
ss << " driverversion: " << properties.driverVersion << std::endl;
|
||||
ss << " deviceType: " << device_type << std::endl;
|
||||
ss << " deviceName: " << properties.deviceName << std::endl;
|
||||
@ -371,7 +371,7 @@ std::string Adapter::stringize() const {
|
||||
|
||||
#define PRINT_LIMIT_PROP_VEC3(name) \
|
||||
ss << " " << std::left << std::setw(36) << #name << limits.name[0] \
|
||||
<< "," << limits.name[1] << "," << limits.name[2] << std::endl;
|
||||
<< ',' << limits.name[1] << ',' << limits.name[2] << std::endl;
|
||||
|
||||
ss << " Physical Device Limits {" << std::endl;
|
||||
PRINT_LIMIT_PROP(maxImageDimension1D);
|
||||
@ -425,7 +425,7 @@ std::string Adapter::stringize() const {
|
||||
;
|
||||
}
|
||||
ss << " ]" << std::endl;
|
||||
ss << "}";
|
||||
ss << '}';
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
@ -33,7 +33,7 @@ std::ostream& operator<<(std::ostream& out, const VkResult result) {
|
||||
VK_RESULT_CASE(VK_ERROR_FORMAT_NOT_SUPPORTED)
|
||||
VK_RESULT_CASE(VK_ERROR_FRAGMENTED_POOL)
|
||||
default:
|
||||
out << "VK_ERROR_UNKNOWN (VkResult " << result << ")";
|
||||
out << "VK_ERROR_UNKNOWN (VkResult " << result << ')';
|
||||
break;
|
||||
}
|
||||
return out;
|
||||
@ -46,7 +46,7 @@ std::ostream& operator<<(std::ostream& out, const VkResult result) {
|
||||
//
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const SourceLocation& loc) {
|
||||
out << loc.function << " at " << loc.file << ":" << loc.line;
|
||||
out << loc.function << " at " << loc.file << ':' << loc.line;
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ Error::Error(SourceLocation source_location, const char* cond, std::string msg)
|
||||
: msg_(std::move(msg)), source_location_{source_location} {
|
||||
std::ostringstream oss;
|
||||
oss << "Exception raised from " << source_location_ << ": ";
|
||||
oss << "(" << cond << ") is false! ";
|
||||
oss << '(' << cond << ") is false! ";
|
||||
oss << msg_;
|
||||
what_ = oss.str();
|
||||
}
|
||||
|
||||
@ -173,8 +173,8 @@ void QueryPool::extract_results() {
|
||||
|
||||
static std::string stringize(const VkExtent3D& extents) {
|
||||
std::stringstream ss;
|
||||
ss << "{" << extents.width << ", " << extents.height << ", " << extents.depth
|
||||
<< "}";
|
||||
ss << '{' << extents.width << ", " << extents.height << ", " << extents.depth
|
||||
<< '}';
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
@ -149,7 +149,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL debug_report_callback_fn(
|
||||
(void)flags;
|
||||
|
||||
std::stringstream stream;
|
||||
stream << layer_prefix << " " << message_code << " " << message << std::endl;
|
||||
stream << layer_prefix << ' ' << message_code << ' ' << message << std::endl;
|
||||
const std::string log = stream.str();
|
||||
|
||||
std::cout << log;
|
||||
|
||||
@ -253,7 +253,7 @@ using vec4 = vec<4u>;
|
||||
|
||||
// uvec3 is the type representing tensor extents. Useful for debugging.
|
||||
inline std::ostream& operator<<(std::ostream& os, const uvec3& v) {
|
||||
os << "(" << v.data[0u] << ", " << v.data[1u] << ", " << v.data[2u] << ")";
|
||||
os << '(' << v.data[0u] << ", " << v.data[1u] << ", " << v.data[2u] << ')';
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
@ -246,7 +246,7 @@ void TestToCFloat() {
|
||||
void TestToString() {
|
||||
Tensor b = ones({3, 7}) * .0000001f;
|
||||
std::stringstream s;
|
||||
s << b << "\n";
|
||||
s << b << '\n';
|
||||
std::string expect = "1e-07 *";
|
||||
ASSERT_EQ_RESOLVED(s.str().substr(0, expect.size()), expect);
|
||||
}
|
||||
|
||||
@ -33,7 +33,7 @@ struct Foo {
|
||||
static void apply(Tensor a, Tensor b) {
|
||||
scalar_type s = 1;
|
||||
std::stringstream ss;
|
||||
ss << "hello, dispatch: " << a.toString() << s << "\n";
|
||||
ss << "hello, dispatch: " << a.toString() << s << '\n';
|
||||
auto data = (scalar_type*)a.data_ptr();
|
||||
(void)data;
|
||||
}
|
||||
@ -73,8 +73,8 @@ TEST(TestScalar, TestScalar) {
|
||||
Scalar bar = 3.0;
|
||||
Half h = bar.toHalf();
|
||||
Scalar h2 = h;
|
||||
cout << "H2: " << h2.toDouble() << " " << what.toFloat() << " "
|
||||
<< bar.toDouble() << " " << what.isIntegral(false) << "\n";
|
||||
cout << "H2: " << h2.toDouble() << ' ' << what.toFloat() << ' '
|
||||
<< bar.toDouble() << ' ' << what.isIntegral(false) << '\n';
|
||||
auto gen = at::detail::getDefaultCPUGenerator();
|
||||
{
|
||||
// See Note [Acquire lock when using random generators]
|
||||
@ -84,7 +84,7 @@ TEST(TestScalar, TestScalar) {
|
||||
}
|
||||
if (at::hasCUDA()) {
|
||||
auto t2 = zeros({4, 4}, at::kCUDA);
|
||||
cout << &t2 << "\n";
|
||||
cout << &t2 << '\n';
|
||||
}
|
||||
auto t = ones({4, 4});
|
||||
|
||||
@ -129,7 +129,7 @@ TEST(TestScalar, TestScalar) {
|
||||
std::stringstream ss;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
|
||||
ASSERT_NO_THROW(
|
||||
ss << "hello, dispatch" << x.toString() << s << "\n");
|
||||
ss << "hello, dispatch" << x.toString() << s << '\n');
|
||||
auto data = (scalar_t*)x.data_ptr();
|
||||
(void)data;
|
||||
});
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
int main() {
|
||||
std::cout << at::ones({3,4}, at::CPU(at::kFloat)) << "\n";
|
||||
std::cout << at::ones({3,4}, at::CPU(at::kFloat)) << '\n';
|
||||
}
|
||||
|
||||
@ -1828,9 +1828,9 @@ namespace {
|
||||
#endif
|
||||
|
||||
EXPECT_EQ(u16, c10::detail::fp16_ieee_from_fp32_value(f32s[i]))
|
||||
<< "Test failed for float to uint16 " << f32s[i] << "\n";
|
||||
<< "Test failed for float to uint16 " << f32s[i] << '\n';
|
||||
EXPECT_EQ(x, c10::detail::fp16_ieee_to_fp32_value(u16))
|
||||
<< "Test failed for uint16 to float " << u16 << "\n";
|
||||
<< "Test failed for uint16 to float " << u16 << '\n';
|
||||
}
|
||||
}
|
||||
TEST(FP8E4M3Test, FP8E4M3ConversionFloat) {
|
||||
@ -1848,10 +1848,10 @@ namespace {
|
||||
EXPECT_TRUE(std::isnan(f32));
|
||||
} else {
|
||||
EXPECT_EQ(f32, c10::detail::fp8e4m3fn_to_fp32_value(input))
|
||||
<< "Test failed for u8 to float " << input << "\n";
|
||||
<< "Test failed for u8 to float " << input << '\n';
|
||||
}
|
||||
EXPECT_EQ(u8, c10::detail::fp8e4m3fn_from_fp32_value(f32))
|
||||
<< "Test failed for float to u8 " << f32 << "\n";
|
||||
<< "Test failed for float to u8 " << f32 << '\n';
|
||||
}
|
||||
}
|
||||
TEST(FP8E4M3Test, FP8E4M3BinaryAdd) {
|
||||
@ -2015,10 +2015,10 @@ namespace {
|
||||
EXPECT_TRUE(std::isnan(f32));
|
||||
} else {
|
||||
EXPECT_EQ(f32, c10::detail::fp8e5m2_to_fp32_value(input))
|
||||
<< "Test failed for u8 to float " << input << "\n";
|
||||
<< "Test failed for u8 to float " << input << '\n';
|
||||
}
|
||||
EXPECT_EQ(u8, c10::detail::fp8e5m2_from_fp32_value(f32))
|
||||
<< "Test failed for float to u8 " << f32 << "\n";
|
||||
<< "Test failed for float to u8 " << f32 << '\n';
|
||||
}
|
||||
}
|
||||
TEST(FP8E5M2Test, FP8E5M2BinaryAdd) {
|
||||
|
||||
@ -19,7 +19,7 @@ TEST(Vitals, Basic) {
|
||||
c10::utils::set_env("TORCH_VITAL", "1");
|
||||
TORCH_VITAL_DEFINE(Testing);
|
||||
TORCH_VITAL(Testing, Attribute0) << 1;
|
||||
TORCH_VITAL(Testing, Attribute1) << "1";
|
||||
TORCH_VITAL(Testing, Attribute1) << '1';
|
||||
TORCH_VITAL(Testing, Attribute2) << 1.0f;
|
||||
TORCH_VITAL(Testing, Attribute3) << 1.0;
|
||||
auto t = at::ones({1, 1});
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user