Compare commits

..

1 Commits

Author SHA1 Message Date
cf1a2abf35 force kwarg=copy on .to in proxy_call in export 2025-07-24 16:48:22 -07:00
1188 changed files with 22445 additions and 47928 deletions

View File

@ -12,7 +12,7 @@ fi
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
source $SCRIPTPATH/../manywheel/set_desired_python.sh
pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2
pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1.4 patchelf==0.17.2
for tool in python python3 pip pip3 ninja scons patchelf; do
ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin;

View File

@ -36,104 +36,3 @@ See `build.sh` for valid build environments (it's the giant switch).
# Set flags (see build.sh) and build image
sudo bash -c 'TRITON=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
```
## [Guidance] Adding a New Base Docker Image
### Background
The base Docker images in directory `.ci/docker/` are built by the `docker-builds.yml` workflow. Those images are used throughout the PyTorch CI/CD pipeline. You should only create or modify a base Docker image if you need specific environment changes or dependencies before building PyTorch on CI.
1. **Automatic Rebuilding**:
- The Docker image building process is triggered automatically when changes are made to files in the `.ci/docker/*` directory
- This ensures all images stay up-to-date with the latest dependencies and configurations
2. **Image Reuse in PyTorch Build Workflows** (example: linux-build):
- The images generated by `docker-builds.yml` are reused in `_linux-build.yml` through the `calculate-docker-image` step
- The `_linux-build.yml` workflow:
- Pulls the Docker image determined by the `calculate-docker-image` step
- Runs a Docker container with that image
- Executes `.ci/pytorch/build.sh` inside the container to build PyTorch
3. **Usage in Test Workflows** (example: linux-test):
- The same Docker images are also used in `_linux-test.yml` for running tests
- The `_linux-test.yml` workflow follows a similar pattern:
- It uses the `calculate-docker-image` step to determine which Docker image to use
- It pulls the Docker image and runs a container with that image
- It installs the wheels from the artifacts generated by PyTorch build jobs
- It executes test scripts (like `.ci/pytorch/test.sh` or `.ci/pytorch/multigpu-test.sh`) inside the container
### Understanding File Purposes
#### `.ci/docker/build.sh` vs `.ci/pytorch/build.sh`
- **`.ci/docker/build.sh`**:
- Used for building base Docker images
- Executed by the `docker-builds.yml` workflow to pre-build Docker images for CI
- Contains configurations for different Docker build environments
- **`.ci/pytorch/build.sh`**:
- Used for building PyTorch inside a Docker container
- Called by workflows like `_linux-build.yml` after the Docker container is started
- Builds PyTorch wheels and other artifacts
#### `.ci/docker/ci_commit_pins/` vs `.github/ci_commit_pins`
- **`.ci/docker/ci_commit_pins/`**:
- Used for pinning dependency versions during base Docker image building
- Ensures consistent environments for building PyTorch
- Changes here trigger base Docker image rebuilds
- **`.github/ci_commit_pins`**:
- Used for pinning dependency versions during PyTorch building and tests
- Ensures consistent dependencies for PyTorch across different builds
- Used by build scripts running inside Docker containers
### Step-by-Step Guide for Adding a New Base Docker Image
#### 1. Add Pinned Commits (If Applicable)
We use pinned commits for build stability. The `nightly.yml` workflow checks and updates pinned commits for certain repository dependencies daily.
If your new Docker image needs a library installed from a specific pinned commit or built from source:
1. Add the repository you want to track in `nightly.yml` and `merge-rules.yml`
2. Add the initial pinned commit in `.ci/docker/ci_commit_pins/`. The text filename should match the one defined in step 1
#### 2. Configure the Base Docker Image
1. **Add new Base Docker image configuration** (if applicable):
Add the configuration in `.ci/docker/build.sh`. For example:
```bash
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-new1)
CUDA_VERSION=12.8.1
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
NEW_ARG_1=yes
;;
```
2. **Add build arguments to Docker build command**:
If you're introducing a new argument to the Docker build, make sure to add it in the Docker build step in `.ci/docker/build.sh`:
```bash
docker build \
....
--build-arg "NEW_ARG_1=${NEW_ARG_1}"
```
3. **Update Dockerfile logic**:
Update the Dockerfile to use the new argument. For example, in `ubuntu/Dockerfile`:
```dockerfile
ARG NEW_ARG_1
# Set up environment for NEW_ARG_1
RUN if [ -n "${NEW_ARG_1}" ]; then bash ./do_something.sh; fi
```
4. **Add the Docker configuration** in `.github/workflows/docker-builds.yml`:
The `docker-builds.yml` workflow pre-builds the Docker images whenever changes occur in the `.ci/docker/` directory. This includes the
pinned commit updates.

View File

@ -93,6 +93,7 @@ tag=$(echo $image | awk -F':' '{print $2}')
case "$tag" in
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11)
CUDA_VERSION=12.4
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
@ -103,6 +104,7 @@ case "$tag" in
;;
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11)
CUDA_VERSION=12.8.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
@ -113,6 +115,7 @@ case "$tag" in
;;
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.8.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
VISION=yes
@ -124,6 +127,7 @@ case "$tag" in
;;
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.8.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=9
VISION=yes
@ -135,6 +139,7 @@ case "$tag" in
;;
pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks)
CUDA_VERSION=12.8.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.13
GCC_VERSION=9
VISION=yes
@ -144,18 +149,56 @@ case "$tag" in
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm)
CUDA_VERSION=12.8.1
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9)
CUDA_VERSION=12.6.3
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
;;
pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.6
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-jammy-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.6
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=9
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-jammy-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks)
CUDA_VERSION=12.6
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.13
GCC_VERSION=9
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9)
CUDA_VERSION=12.8.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
VISION=yes
@ -176,6 +219,18 @@ case "$tag" in
VISION=yes
TRITON=yes
;;
pytorch-linux-jammy-py3.11-clang12)
ANACONDA_PYTHON_VERSION=3.11
CLANG_VERSION=12
VISION=yes
TRITON=yes
;;
pytorch-linux-jammy-py3.9-gcc9)
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=9
VISION=yes
TRITON=yes
;;
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-noble-rocm-n-py3)
if [[ $tag =~ "jammy" ]]; then
ANACONDA_PYTHON_VERSION=3.10
@ -221,7 +276,7 @@ case "$tag" in
NINJA_VERSION=1.9.0
TRITON=yes
;;
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=11
VISION=yes
@ -233,6 +288,7 @@ case "$tag" in
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12)
ANACONDA_PYTHON_VERSION=3.9
CUDA_VERSION=12.8.1
CUDNN_VERSION=9
CLANG_VERSION=12
VISION=yes
TRITON=yes
@ -311,6 +367,7 @@ case "$tag" in
fi
if [[ "$image" == *cuda* ]]; then
extract_version_from_image_name cuda CUDA_VERSION
extract_version_from_image_name cudnn CUDNN_VERSION
fi
if [[ "$image" == *rocm* ]]; then
extract_version_from_image_name rocm ROCM_VERSION
@ -362,6 +419,9 @@ docker build \
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
--build-arg "GCC_VERSION=${GCC_VERSION}" \
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
--build-arg "KATEX=${KATEX:-}" \
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \

View File

@ -1 +1 @@
f7888497a1eb9e98d4c07537f0d0bcfe180d1363
11ec6354315768a85da41032535e3b7b99c5f706

View File

@ -68,8 +68,8 @@ function install_nvshmem {
# download, unpack, install
wget -q "${url}"
tar xf "${filename}.tar.gz"
cp -a "libnvshmem/include/"* /usr/local/cuda/include/
cp -a "libnvshmem/lib/"* /usr/local/cuda/lib64/
cp -a "libnvshmem/include/"* /usr/local/include/
cp -a "libnvshmem/lib/"* /usr/local/lib/
# cleanup
cd ..

View File

@ -0,0 +1,26 @@
#!/bin/bash
if [[ -n "${CUDNN_VERSION}" ]]; then
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn
pushd tmp_cudnn
if [[ ${CUDA_VERSION:0:4} == "12.9" || ${CUDA_VERSION:0:4} == "12.8" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.10.2.21_cuda12-archive"
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.10.2.21_cuda12-archive"
elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.10.2.21_cuda12-archive"
elif [[ ${CUDA_VERSION:0:2} == "11" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda11-archive"
else
print "Unsupported CUDA version ${CUDA_VERSION}"
exit 1
fi
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
tar xf ${CUDNN_NAME}.tar.xz
cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cudnn
ldconfig
fi

View File

@ -15,37 +15,11 @@ function install_timm() {
commit=$(get_pinned_commit timm)
pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
}
function install_torchbench() {
local commit
commit=$(get_pinned_commit torchbench)
git clone https://github.com/pytorch/benchmark torchbench
pushd torchbench
git checkout "$commit"
python install.py --continue_on_fail
# TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
# is regressing speedup metric. This needs to be investigated further
pip install transformers==4.38.1
echo "Print all dependencies after TorchBench is installed"
python -mpip freeze
popd
chown -R jenkins torchbench
# Clean up
conda_run pip uninstall -y torch torchvision triton
}
# Pango is needed for weasyprint which is needed for doctr
conda_install pango
# Stable packages are ok here, just to satisfy TorchBench check
pip_install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
install_torchbench
install_huggingface
install_timm
# Clean up
conda_run pip uninstall -y torch torchvision torchaudio triton

View File

@ -30,7 +30,7 @@ EOF
# we want the patch version of 6.4 instead
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
ROCM_VERSION="${ROCM_VERSION}.2"
ROCM_VERSION="${ROCM_VERSION}.1"
fi
# Default url values
@ -85,19 +85,16 @@ EOF
# CI no longer builds for ROCm 6.3, but
# ROCm 6.4 did not yet fix the regression, also HIP branch names are different
if [[ $(ver $ROCM_VERSION) -ge $(ver 6.4) ]] && [[ $(ver $ROCM_VERSION) -lt $(ver 7.0) ]]; then
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.2) ]]; then
HIP_TAG=rocm-6.4.2
CLR_HASH=74d78ba3ac4bac235d02bcb48511c30b5cfdd457 # branch release/rocm-rel-6.4.2-statco-hotfix
elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.1) ]]; then
HIP_TAG=rocm-6.4.1
CLR_HASH=efe6c35790b9206923bfeed1209902feff37f386 # branch release/rocm-rel-6.4.1-statco-hotfix
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.1) ]]; then
HIP_BRANCH=release/rocm-rel-6.4
CLR_HASH=ca18eb3f77fa09292fcda62bc60c3e565d752ada # branch release/rocm-rel-6.4.1-statco-hotfix
elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
HIP_TAG=rocm-6.4.0
HIP_BRANCH=release/rocm-rel-6.4
CLR_HASH=600f5b0d2baed94d5121e2174a9de0851b040b0c # branch release/rocm-rel-6.4-statco-hotfix
fi
# clr build needs CppHeaderParser but can only find it using conda's python
python -m pip install CppHeaderParser
git clone https://github.com/ROCm/HIP -b $HIP_TAG
git clone https://github.com/ROCm/HIP -b $HIP_BRANCH
HIP_COMMON_DIR=$(readlink -f HIP)
git clone https://github.com/jeffdaily/clr
pushd clr

View File

@ -41,7 +41,7 @@ case ${DOCKER_TAG_PREFIX} in
rocm*)
# we want the patch version of 6.4 instead
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.1"
fi
BASE_TARGET=rocm
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete

View File

@ -128,7 +128,7 @@ ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
# Install setuptools and wheel for python 3.12/3.13
RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
/opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
/opt/python/${cpython_version}/bin/python -m pip install "setuptools>=77.0.0" "packaging>=24.2" wheel; \
done;

View File

@ -124,10 +124,9 @@ RUN python3 -mpip install cmake==3.28.0
# install newest flatbuffers version first:
# for some reason old version is getting pulled in otherwise.
# packaging package is required for onnxruntime wheel build.
RUN pip3 install flatbuffers && \
pip3 install cython 'pkgconfig>=1.5.5' 'setuptools>=77' 'numpy<2.3.0' && \
RUN pip3 install 'setuptools>=77.0' 'packaging>=24.2' && \
pip3 install flatbuffers cython 'pkgconfig>=1.5.5' 'numpy<2.3.0' && \
pip3 install --no-build-isolation h5py==3.11.0 && \
pip3 install packaging && \
git clone https://github.com/microsoft/onnxruntime && \
cd onnxruntime && git checkout v1.21.0 && \
git submodule update --init --recursive && \

View File

@ -77,7 +77,7 @@ case ${image} in
manylinux2_28-builder:rocm*)
# we want the patch version of 6.4 instead
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.1"
fi
TARGET=rocm_final
MANY_LINUX_VERSION="2_28"

View File

@ -104,10 +104,10 @@ networkx==2.8.8
#Pinned versions: 2.8.8
#test that import: functorch
ninja==1.11.1.3
ninja==1.11.1.4
#Description: build system. Used in some tests. Used in build to generate build
#time tracing information
#Pinned versions: 1.11.1.3
#Pinned versions: 1.11.1.4
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
numba==0.49.0 ; python_version < "3.9"
@ -221,9 +221,9 @@ pygments==2.15.0
#Pinned versions: 2.12.0
#test that import: the doctests
#pyyaml
#PyYAML
#Description: data serialization format
#Pinned versions: 6.0.2
#Pinned versions:
#test that import:
#requests
@ -233,7 +233,7 @@ pygments==2.15.0
#rich
#Description: rich text and beautiful formatting in the terminal
#Pinned versions: 14.1.0
#Pinned versions: 10.9.0
#test that import:
scikit-image==0.19.3 ; python_version < "3.10"
@ -361,10 +361,12 @@ pwlf==2.2.1
#Pinned versions: 2.2.1
#test that import: test_sac_estimator.py
# To build PyTorch itself
packaging>=24.2
pyyaml
pyzstd
setuptools>=70.1.0
setuptools>=77.0.0
six
scons==4.5.2 ; platform_machine == "aarch64"

View File

@ -1,11 +1,11 @@
sphinx==5.3.0
#Description: This is used to generate PyTorch docs
#Pinned versions: 5.3.0
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@722b7e6f9ca512fcc526ad07d62b3d28c50bb6cd#egg=pytorch_sphinx_theme2
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
# something related to Docker setup. We can investigate this later.
# but it doesn't seem to work and hangs around idly. The initial thought it is probably
# something related to Docker setup. We can investigate this later
sphinxcontrib.katex==0.8.6
#Description: This is used to generate PyTorch docs
@ -50,8 +50,8 @@ IPython==8.12.0
#Pinned versions: 8.12.0
myst-nb==0.17.2
#Description: This is used to generate PyTorch functorch and torch.compile docs.
#Pinned versions: 0.17.2
#Description: This is used to generate PyTorch functorch docs
#Pinned versions: 0.13.2
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
python-etcd==0.4.5
@ -59,3 +59,4 @@ sphinx-copybutton==0.5.0
sphinx-design==0.4.0
sphinxcontrib-mermaid==1.0.0
myst-parser==0.18.1
myst-nb

View File

@ -98,9 +98,8 @@ COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface.txt huggingface.txt
COPY ci_commit_pins/timm.txt timm.txt
COPY ci_commit_pins/torchbench.txt torchbench.txt
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
# (optional) Install non-default Ninja version
ARG NINJA_VERSION

View File

@ -98,9 +98,8 @@ COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface.txt huggingface.txt
COPY ci_commit_pins/timm.txt timm.txt
COPY ci_commit_pins/torchbench.txt torchbench.txt
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
ARG TRITON
ARG TRITON_CPU

View File

@ -194,7 +194,7 @@ ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
ROCBLAS_LIB_DST=lib/rocblas/library
ROCBLAS_ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
ROCBLAS_OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
ROCBLAS_LIB_FILES=($ROCBLAS_ARCH_SPECIFIC_FILES $ROCBLAS_OTHER_FILES)
ROCBLAS_LIB_FILES=($ROCBLAS_ARCH_SPECIFIC_FILES $OTHER_FILES)
# hipblaslt library files
HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library

34
.ci/pytorch/build-mobile.sh Executable file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env bash
# DO NOT ADD 'set -x' not to reveal CircleCI secret context environment variables
set -eu -o pipefail
# This script uses linux host toolchain + mobile build options in order to
# build & test mobile libtorch without having to setup Android/iOS
# toolchain/simulator.
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
# Install torch & torchvision - used to download & trace test model.
# Ideally we should use the libtorch built on the PR so that backward
# incompatible changes won't break this script - but it will significantly slow
# down mobile CI jobs.
# Here we install nightly instead of stable so that we have an option to
# temporarily skip mobile CI jobs on BC-breaking PRs until they are in nightly.
retry pip install --pre torch torchvision \
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html \
--progress-bar off
# Run end-to-end process of building mobile library, linking into the predictor
# binary, and running forward pass with a real model.
if [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-static* ]]; then
TEST_CUSTOM_BUILD_STATIC=1 test/mobile/custom_build/build.sh
elif [[ "$BUILD_ENVIRONMENT" == *-mobile-lightweight-dispatch* ]]; then
test/mobile/lightweight_dispatch/build.sh
else
TEST_DEFAULT_BUILD=1 test/mobile/custom_build/build.sh
fi
print_sccache_stats

View File

@ -11,6 +11,10 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
fi
echo "Python version:"
python --version
@ -120,8 +124,26 @@ if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then
fi
# Use special scripts for Android builds
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
export ANDROID_NDK=/opt/ndk
build_args=()
if [[ "${BUILD_ENVIRONMENT}" == *-arm-v7a* ]]; then
build_args+=("-DANDROID_ABI=armeabi-v7a")
elif [[ "${BUILD_ENVIRONMENT}" == *-arm-v8a* ]]; then
build_args+=("-DANDROID_ABI=arm64-v8a")
elif [[ "${BUILD_ENVIRONMENT}" == *-x86_32* ]]; then
build_args+=("-DANDROID_ABI=x86")
elif [[ "${BUILD_ENVIRONMENT}" == *-x86_64* ]]; then
build_args+=("-DANDROID_ABI=x86_64")
fi
if [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
build_args+=("-DUSE_VULKAN=ON")
fi
build_args+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
exec ./scripts/build_android.sh "${build_args[@]}" "$@"
fi
if [[ "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
if [[ "$BUILD_ENVIRONMENT" != *android* && "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
export USE_VULKAN=1
# shellcheck disable=SC1091
source /var/lib/jenkins/vulkansdk/setup-env.sh
@ -203,7 +225,7 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
export USE_PRECOMPILED_HEADERS=1
fi
if [[ "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
fi
@ -247,6 +269,9 @@ if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" //...
fi
else
# install build-system requirements before running setup.py commands
python -m pip install -r requirements-build.txt
# check that setup.py would fail with bad arguments
echo "The next three invocations are expected to fail with invalid command error messages."
( ! get_exit_code python setup.py bad_argument )

View File

@ -204,29 +204,6 @@ function install_torchrec_and_fbgemm() {
pip_build_and_install "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" dist/torchrec
pip_uninstall fbgemm-gpu-nightly
# Set ROCM_HOME isn't available, use ROCM_PATH if set or /opt/rocm
ROCM_HOME="${ROCM_HOME:-${ROCM_PATH:-/opt/rocm}}"
# Find rocm_version.h header file for ROCm version extract
rocm_version_h="${ROCM_HOME}/include/rocm-core/rocm_version.h"
if [ ! -f "$rocm_version_h" ]; then
rocm_version_h="${ROCM_HOME}/include/rocm_version.h"
fi
# Error out if rocm_version.h not found
if [ ! -f "$rocm_version_h" ]; then
echo "Error: rocm_version.h not found in expected locations." >&2
exit 1
fi
# Extract major, minor and patch ROCm version numbers
MAJOR_VERSION=$(grep 'ROCM_VERSION_MAJOR' "$rocm_version_h" | awk '{print $3}')
MINOR_VERSION=$(grep 'ROCM_VERSION_MINOR' "$rocm_version_h" | awk '{print $3}')
PATCH_VERSION=$(grep 'ROCM_VERSION_PATCH' "$rocm_version_h" | awk '{print $3}')
ROCM_INT=$((MAJOR_VERSION * 10000 + MINOR_VERSION * 100 + PATCH_VERSION))
echo "ROCm version: $ROCM_INT"
export BUILD_ROCM_VERSION="$MAJOR_VERSION.$MINOR_VERSION"
pip_install tabulate # needed for newer fbgemm
pip_install patchelf # needed for rocm fbgemm
@ -244,9 +221,9 @@ function install_torchrec_and_fbgemm() {
if [ "${found_whl}" == "0" ]; then
git clone --recursive https://github.com/pytorch/fbgemm
pushd fbgemm/fbgemm_gpu
git checkout "${fbgemm_commit}" --recurse-submodules
git checkout "${fbgemm_commit}"
python setup.py bdist_wheel \
--build-variant=rocm \
--package_variant=rocm \
-DHIP_ROOT_DIR="${ROCM_PATH}" \
-DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
-DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
@ -281,6 +258,30 @@ function clone_pytorch_xla() {
fi
}
function checkout_install_torchbench() {
local commit
commit=$(get_pinned_commit torchbench)
git clone https://github.com/pytorch/benchmark torchbench
pushd torchbench
git checkout "$commit"
if [ "$1" ]; then
python install.py --continue_on_fail models "$@"
else
# Occasionally the installation may fail on one model but it is ok to continue
# to install and test other models
python install.py --continue_on_fail
fi
# TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
# is regressing speedup metric. This needs to be investigated further
pip install transformers==4.38.1
echo "Print all dependencies after TorchBench is installed"
python -mpip freeze
popd
}
function install_torchao() {
local commit
commit=$(get_pinned_commit torchao)

View File

@ -0,0 +1,123 @@
from datetime import datetime, timedelta, timezone
from tempfile import mkdtemp
from cryptography import x509
from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.x509.oid import NameOID
temp_dir = mkdtemp()
print(temp_dir)
def genrsa(path):
key = rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
)
with open(path, "wb") as f:
f.write(
key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption(),
)
)
return key
def create_cert(path, C, ST, L, O, key):
subject = issuer = x509.Name(
[
x509.NameAttribute(NameOID.COUNTRY_NAME, C),
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
x509.NameAttribute(NameOID.LOCALITY_NAME, L),
x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
]
)
cert = (
x509.CertificateBuilder()
.subject_name(subject)
.issuer_name(issuer)
.public_key(key.public_key())
.serial_number(x509.random_serial_number())
.not_valid_before(datetime.now(timezone.utc))
.not_valid_after(
# Our certificate will be valid for 10 days
datetime.now(timezone.utc) + timedelta(days=10)
)
.add_extension(
x509.BasicConstraints(ca=True, path_length=None),
critical=True,
)
.sign(key, hashes.SHA256())
)
# Write our certificate out to disk.
with open(path, "wb") as f:
f.write(cert.public_bytes(serialization.Encoding.PEM))
return cert
def create_req(path, C, ST, L, O, key):
csr = (
x509.CertificateSigningRequestBuilder()
.subject_name(
x509.Name(
[
# Provide various details about who we are.
x509.NameAttribute(NameOID.COUNTRY_NAME, C),
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
x509.NameAttribute(NameOID.LOCALITY_NAME, L),
x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
]
)
)
.sign(key, hashes.SHA256())
)
with open(path, "wb") as f:
f.write(csr.public_bytes(serialization.Encoding.PEM))
return csr
def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
cert = (
x509.CertificateBuilder()
.subject_name(csr_cert.subject)
.issuer_name(ca_cert.subject)
.public_key(csr_cert.public_key())
.serial_number(x509.random_serial_number())
.not_valid_before(datetime.now(timezone.utc))
.not_valid_after(
# Our certificate will be valid for 10 days
datetime.now(timezone.utc) + timedelta(days=10)
# Sign our certificate with our private key
)
.sign(private_ca_key, hashes.SHA256())
)
with open(path, "wb") as f:
f.write(cert.public_bytes(serialization.Encoding.PEM))
return cert
ca_key = genrsa(temp_dir + "/ca.key")
ca_cert = create_cert(
temp_dir + "/ca.pem",
"US",
"New York",
"New York",
"Gloo Certificate Authority",
ca_key,
)
pkey = genrsa(temp_dir + "/pkey.key")
csr = create_req(
temp_dir + "/csr.csr",
"US",
"California",
"San Francisco",
"Gloo Testing Company",
pkey,
)
cert = sign_certificate_request(temp_dir + "/cert.pem", csr, ca_cert, ca_key)

View File

@ -157,29 +157,6 @@ test_jit_hooks() {
assert_git_not_dirty
}
# Shellcheck doesn't like it when you pass no arguments to a function
# that can take args. See https://www.shellcheck.net/wiki/SC2120
# shellcheck disable=SC2120
checkout_install_torchbench() {
local commit
commit=$(cat .ci/docker/ci_commit_pins/torchbench.txt)
git clone https://github.com/pytorch/benchmark torchbench
pushd torchbench
git checkout "$commit"
if [ "$1" ]; then
python install.py --continue_on_fail models "$@"
else
# Occasionally the installation may fail on one model but it is ok to continue
# to install and test other models
python install.py --continue_on_fail
fi
echo "Print all dependencies after TorchBench is installed"
python -mpip freeze
popd
}
torchbench_setup_macos() {
git clone --recursive https://github.com/pytorch/vision torchvision
git clone --recursive https://github.com/pytorch/audio torchaudio
@ -202,6 +179,8 @@ torchbench_setup_macos() {
USE_OPENMP=0 python setup.py develop
popd
# Shellcheck doesn't like it when you pass no arguments to a function that can take args. See https://www.shellcheck.net/wiki/SC2120
# shellcheck disable=SC2119,SC2120
checkout_install_torchbench
}

18
.ci/pytorch/run_glootls_test.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash
CREATE_TEST_CERT="$(dirname "${BASH_SOURCE[0]}")/create_test_cert.py"
TMP_CERT_DIR=$(python "$CREATE_TEST_CERT")
openssl verify -CAfile "${TMP_CERT_DIR}/ca.pem" "${TMP_CERT_DIR}/cert.pem"
export GLOO_DEVICE_TRANSPORT=TCP_TLS
export GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY=${TMP_CERT_DIR}/pkey.key
export GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT=${TMP_CERT_DIR}/cert.pem
export GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE=${TMP_CERT_DIR}/ca.pem
time python test/run_test.py --include distributed/test_c10d_gloo --verbose -- ProcessGroupGlooTest
unset GLOO_DEVICE_TRANSPORT
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE

View File

@ -385,29 +385,6 @@ def smoke_test_compile(device: str = "cpu") -> None:
x_pt2 = torch.compile(model, mode="max-autotune")(x)
def smoke_test_nvshmem() -> None:
if not torch.cuda.is_available():
print("CUDA is not available, skipping NVSHMEM test")
return
# Check if NVSHMEM is compiled in current build
try:
from torch._C._distributed_c10d import _is_nvshmem_available
except ImportError:
# Not built with NVSHMEM support.
# torch is not compiled with NVSHMEM prior to 2.9
if torch.__version__ < "2.9":
return
else:
# After 2.9: NVSHMEM is expected to be compiled in current build
raise RuntimeError("torch not compiled with NVSHMEM") from None
print("torch compiled with NVSHMEM")
# Check if NVSHMEM is available on current system.
print(f"NVSHMEM available at run time: {_is_nvshmem_available()}")
def smoke_test_modules():
cwd = os.getcwd()
for module in MODULES:
@ -502,8 +479,6 @@ def main() -> None:
options.pypi_pkg_check,
)
smoke_test_nvshmem()
if __name__ == "__main__":
main()

View File

@ -201,7 +201,7 @@ fi
if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
# JIT C++ extensions require ninja.
pip_install "ninja==1.10.2"
pip_install "ninja==1.11.1.4"
# ninja is installed in $HOME/.local/bin, e.g., /var/lib/jenkins/.local/bin for CI user jenkins
# but this script should be runnable by any user, including root
export PATH="$HOME/.local/bin:$PATH"
@ -345,12 +345,6 @@ test_h100_symm_mem() {
assert_git_not_dirty
}
test_h100_cutlass_backend() {
# cutlass backend tests for H100
TORCHINDUCTOR_CUTLASS_DIR=$(realpath "./third_party/cutlass") python test/run_test.py --include inductor/test_cutlass_backend -k "not addmm" $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
TORCHINDUCTOR_CUTLASS_DIR=$(realpath "./third_party/cutlass") python test/run_test.py --include inductor/test_cutlass_evt $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
}
test_lazy_tensor_meta_reference_disabled() {
export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1
echo "Testing lazy tensor operations without meta reference"
@ -365,6 +359,7 @@ test_dynamo_wrapped_shard() {
exit 1
fi
python tools/dynamo/verify_dynamo.py
python tools/dynamo/gb_id_mapping.py verify
# PLEASE DO NOT ADD ADDITIONAL EXCLUDES HERE.
# Instead, use @skipIfTorchDynamo on your tests.
time python test/run_test.py --dynamo \
@ -462,7 +457,7 @@ test_inductor_aoti() {
# rebuild with the build cache with `BUILD_AOT_INDUCTOR_TEST` enabled
/usr/bin/env CMAKE_FRESH=1 BUILD_AOT_INDUCTOR_TEST=1 "${BUILD_COMMAND[@]}"
/usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference cpp/test_vec_half_AVX2 -dist=loadfile
/usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference -dist=loadfile
}
test_inductor_cpp_wrapper_shard() {
@ -627,8 +622,6 @@ test_perf_for_dashboard() {
device=cuda_a10g
elif [[ "${TEST_CONFIG}" == *h100* ]]; then
device=cuda_h100
elif [[ "${TEST_CONFIG}" == *b200* ]]; then
device=cuda_b200
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
device=rocm
fi
@ -803,16 +796,6 @@ test_dynamo_benchmark() {
if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
# TODO (huydhn): Just smoke test some sample models
if [[ "${TEST_CONFIG}" == *b200* ]]; then
if [[ "${suite}" == "huggingface" ]]; then
export TORCHBENCH_ONLY_MODELS="DistillGPT2"
elif [[ "${suite}" == "timm_models" ]]; then
export TORCHBENCH_ONLY_MODELS="inception_v3"
elif [[ "${suite}" == "torchbench" ]]; then
export TORCHBENCH_ONLY_MODELS="hf_Bert"
fi
fi
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
else
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
@ -940,6 +923,12 @@ test_torchbench_gcp_smoketest(){
popd
}
test_python_gloo_with_tls() {
source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
assert_git_not_dirty
}
test_aten() {
# Test ATen
# The following test(s) of ATen have already been skipped by caffe2 in rocm environment:
@ -986,8 +975,6 @@ test_without_numpy() {
if [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;torch.compile(lambda x:print(x))('Hello World')"
fi
# Regression test for https://github.com/pytorch/pytorch/pull/157734 (torch.onnx should be importable without numpy)
python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch; import torch.onnx"
popd
}
@ -1332,13 +1319,10 @@ EOF
# Step 2. Make sure that the public API test "test_correct_module_names" fails when an existing
# file is modified to introduce an invalid public API function.
# The filepath here must not have __all__ defined in it, otherwise the test will pass.
# If your PR introduces __all__ to torch/cuda/streams.py please point this to another file
# that does not have __all__ defined.
EXISTING_FILEPATH="${TORCH_INSTALL_DIR}/cuda/streams.py"
EXISTING_FILEPATH="${TORCH_INSTALL_DIR}/nn/parameter.py"
cp -v "${EXISTING_FILEPATH}" "${EXISTING_FILEPATH}.orig"
echo "${BAD_PUBLIC_FUNC}" >> "${EXISTING_FILEPATH}"
invalid_api="torch.cuda.streams.new_public_func"
invalid_api="torch.nn.parameter.new_public_func"
echo "Appended an invalid public API function to existing file ${EXISTING_FILEPATH}..."
check_public_api_test_fails \
@ -1572,7 +1556,7 @@ test_executorch() {
test_linux_aarch64() {
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops \
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops test_cpp_extensions_open_device_registration \
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
# Dynamo tests
@ -1684,11 +1668,13 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
elif [[ "${TEST_CONFIG}" == cachebench ]]; then
install_torchaudio
install_torchvision
PYTHONPATH=/torchbench test_cachebench
checkout_install_torchbench nanogpt BERT_pytorch resnet50 hf_T5 llama moco
PYTHONPATH=$(pwd)/torchbench test_cachebench
elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
install_torchaudio
install_torchvision
PYTHONPATH=/torchbench test_verify_cachebench
checkout_install_torchbench nanogpt
PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
install_torchaudio
install_torchvision
@ -1697,22 +1683,28 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
# https://github.com/opencv/opencv-python/issues/885
pip_install opencv-python==4.8.0.74
if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
PYTHONPATH=/torchbench test_inductor_torchbench_smoketest_perf
checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
PYTHONPATH=/torchbench test_inductor_torchbench_cpu_smoketest_perf
checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \
llama_v2_7b_16h resnet50 timm_efficientnet mobilenet_v3_large timm_resnest \
functorch_maml_omniglot yolov3 mobilenet_v2 resnext50_32x4d densenet121 mnasnet1_0
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf
elif [[ "${TEST_CONFIG}" == *torchbench_gcp_smoketest* ]]; then
TORCHBENCHPATH=/torchbench test_torchbench_gcp_smoketest
checkout_install_torchbench
TORCHBENCHPATH=$(pwd)/torchbench test_torchbench_gcp_smoketest
else
checkout_install_torchbench
# Do this after checkout_install_torchbench to ensure we clobber any
# nightlies that torchbench may pull in
if [[ "${TEST_CONFIG}" != *cpu* ]]; then
install_torchrec_and_fbgemm
fi
PYTHONPATH=/torchbench test_dynamo_benchmark torchbench "$id"
PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
fi
elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
install_torchvision
PYTHONPATH=/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
if [[ "$SHARD_NUMBER" -eq "1" ]]; then
test_inductor_aoti
fi
@ -1777,8 +1769,6 @@ elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then
test_h100_distributed
elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then
test_h100_symm_mem
elif [[ "${TEST_CONFIG}" == h100_cutlass_backend ]]; then
test_h100_cutlass_backend
else
install_torchvision
install_monkeytype

View File

@ -1,34 +0,0 @@
# If you want to rebuild, run this with $env:REBUILD=1
# If you want to build with CUDA, run this with $env:USE_CUDA=1
# If you want to build without CUDA, run this with $env:USE_CUDA=0
# Check for setup.py in the current directory
if (-not (Test-Path "setup.py")) {
Write-Host "ERROR: Please run this build script from PyTorch root directory."
exit 1
}
# Get the script's parent directory
$ScriptParentDir = Split-Path -Parent $MyInvocation.MyCommand.Definition
# Set TMP_DIR and convert to Windows path
$env:TMP_DIR = Join-Path (Get-Location) "build\win_tmp"
$env:TMP_DIR_WIN = $env:TMP_DIR # Already in Windows format, no cygpath needed
# Set final package directory with default fallback
if (-not $env:PYTORCH_FINAL_PACKAGE_DIR) {
$env:PYTORCH_FINAL_PACKAGE_DIR = "C:\w\build-results"
}
# Create the final package directory if it doesn't exist
if (-not (Test-Path $env:PYTORCH_FINAL_PACKAGE_DIR)) {
New-Item -Path $env:PYTORCH_FINAL_PACKAGE_DIR -ItemType Directory -Force | Out-Null
}
# Set script helpers directory
$env:SCRIPT_HELPERS_DIR = Join-Path $ScriptParentDir "win-test-helpers\arm64"
# Run the main build script
& "$env:SCRIPT_HELPERS_DIR\build_pytorch.ps1"
Write-Host "BUILD PASSED"

View File

@ -1,24 +0,0 @@
#!/bin/bash
set -ex -o pipefail
SCRIPT_PARENT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
# shellcheck source=./common.sh
source "$SCRIPT_PARENT_DIR/common.sh"
run_tests() {
echo Running smoke_test.py...
python ./.ci/pytorch/smoke_test/smoke_test.py --package torchonly
echo Running test_autograd.oy, test_nn.py, test_torch.py...
cd test
CORE_TEST_LIST=("test_autograd.py" "test_nn.py" "test_modules.py")
for t in "${CORE_TEST_LIST[@]}"; do
echo "Running test: $t"
python "$t" --verbose --save-xml --use-pytest -vvvv -rfEsxXP -p no:xdist
done
}
run_tests
echo "TEST PASSED"

View File

@ -1,98 +0,0 @@
# TODO: we may can use existing build_pytorch.bat for arm64
if ($env:DEBUG -eq "1") {
$env:BUILD_TYPE = "debug"
} else {
$env:BUILD_TYPE = "release"
}
# This inflates our log size slightly, but it is REALLY useful to be
# able to see what our cl.exe commands are. (since you can actually
# just copy-paste them into a local Windows setup to just rebuild a
# single file.)
# log sizes are too long, but leaving this here in case someone wants to use it locally
# $env:CMAKE_VERBOSE_MAKEFILE = "1"
$env:INSTALLER_DIR = Join-Path $env:SCRIPT_HELPERS_DIR "installation-helpers"
cd ..
# Environment variables
$env:SCCACHE_IDLE_TIMEOUT = "0"
$env:SCCACHE_IGNORE_SERVER_IO_ERROR = "1"
$env:CMAKE_BUILD_TYPE = $env:BUILD_TYPE
$env:CMAKE_C_COMPILER_LAUNCHER = "sccache"
$env:CMAKE_CXX_COMPILER_LAUNCHER = "sccache"
$env:libuv_ROOT = Join-Path $env:DEPENDENCIES_DIR "libuv\install"
$env:MSSdk = "1"
if ($env:PYTORCH_BUILD_VERSION) {
$env:PYTORCH_BUILD_VERSION = $env:PYTORCH_BUILD_VERSION
$env:PYTORCH_BUILD_NUMBER = "1"
}
$env:CMAKE_POLICY_VERSION_MINIMUM = "3.5"
# Set BLAS type
if ($env:ENABLE_APL -eq "1") {
$env:BLAS = "APL"
$env:USE_LAPACK = "1"
} elseif ($env:ENABLE_OPENBLAS -eq "1") {
$env:BLAS = "OpenBLAS"
$env:OpenBLAS_HOME = Join-Path $env:DEPENDENCIES_DIR "OpenBLAS\install"
}
# Change to source directory
Set-Location $env:PYTORCH_ROOT
# Copy libuv.dll
Copy-Item -Path (Join-Path $env:libuv_ROOT "lib\Release\uv.dll") -Destination "torch\lib\uv.dll" -Force
# Create virtual environment
python -m venv .venv
.\.venv\Scripts\Activate.ps1
where.exe python
# Python install dependencies
python -m pip install --upgrade pip
pip install setuptools pyyaml
pip install -r requirements.txt
# Set after installing psutil
$env:DISTUTILS_USE_SDK = "1"
# Print all environment variables
Get-ChildItem Env:
# Start and inspect sccache
sccache --start-server
sccache --zero-stats
sccache --show-stats
# Build the wheel
python setup.py bdist_wheel
if ($LASTEXITCODE -ne 0) { exit 1 }
# Install the wheel locally
$whl = Get-ChildItem -Path "dist\*.whl" | Select-Object -First 1
if ($whl) {
python -mpip install --no-index --no-deps $whl.FullName
}
# Copy final wheel
robocopy "dist" "$env:PYTORCH_FINAL_PACKAGE_DIR" *.whl
# Export test times
python tools/stats/export_test_times.py
# Copy additional CI files
robocopy ".additional_ci_files" "$env:PYTORCH_FINAL_PACKAGE_DIR\.additional_ci_files" /E
# Save ninja log
Copy-Item -Path "build\.ninja_log" -Destination $env:PYTORCH_FINAL_PACKAGE_DIR -Force
# Final sccache stats and stop
sccache --show-stats
sccache --stop-server
exit 0

View File

@ -126,6 +126,11 @@ if "%USE_CUDA%"=="1" (
set CMAKE_CUDA_COMPILER_LAUNCHER=%TMP_DIR%/bin/randomtemp.exe;%TMP_DIR%\bin\sccache.exe
)
:: Install build-system requirements before running setup.py commands
python -m pip install -r requirements-build.txt
if errorlevel 1 goto fail
if not errorlevel 0 goto fail
:: Print all existing environment variable for debugging
set

View File

@ -18,5 +18,5 @@ start /wait "" python-amd64.exe /quiet InstallAllUsers=1 PrependPath=0 Include_t
if errorlevel 1 exit /b 1
set "PATH=%CD%\Python\Scripts;%CD%\Python;%PATH%"
%PYTHON_EXEC% -m pip install --upgrade pip setuptools packaging wheel
%PYTHON_EXEC% -m pip install --upgrade pip "setuptools>=77.0.0" "packaging>=24.2" wheel
if errorlevel 1 exit /b 1

View File

@ -7,6 +7,9 @@ call "internal\install_python.bat"
%PYTHON_EXEC% --version
set "PATH=%CD%\Python\Lib\site-packages\cmake\data\bin;%CD%\Python\Scripts;%CD%\Python;%PATH%"
%PYTHON_EXEC% -m pip install "setuptools>=77.0.0" "packaging>=24.2"
if "%DESIRED_PYTHON%" == "3.13t" %PYTHON_EXEC% -m pip install numpy==2.2.1 cmake
if "%DESIRED_PYTHON%" == "3.13" %PYTHON_EXEC% -m pip install numpy==2.1.2 cmake
if "%DESIRED_PYTHON%" == "3.12" %PYTHON_EXEC% -m pip install numpy==2.0.2 cmake
@ -16,7 +19,7 @@ if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install numpy==2.0.2 cmake
%PYTHON_EXEC% -m pip install pyyaml
%PYTHON_EXEC% -m pip install mkl-include mkl-static
%PYTHON_EXEC% -m pip install boto3 ninja typing_extensions setuptools==72.1.0
%PYTHON_EXEC% -m pip install boto3 ninja typing-extensions
where cmake.exe

View File

@ -127,7 +127,7 @@ export INSTALL_TEST=0 # dont install test binaries into site-packages
export MACOSX_DEPLOYMENT_TARGET=10.15
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
SETUPTOOLS_PINNED_VERSION="==70.1.0"
SETUPTOOLS_PINNED_VERSION="==77.0.0"
PYYAML_PINNED_VERSION="=5.3"
EXTRA_CONDA_INSTALL_FLAGS=""
CONDA_ENV_CREATE_FLAGS=""
@ -135,7 +135,7 @@ RENAME_WHEEL=true
case $desired_python in
3.13t)
echo "Using 3.13 deps"
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
SETUPTOOLS_PINNED_VERSION=">=77.0.0"
PYYAML_PINNED_VERSION=">=6.0.1"
NUMPY_PINNED_VERSION="=2.1.0"
CONDA_ENV_CREATE_FLAGS="python-freethreading"
@ -145,31 +145,31 @@ case $desired_python in
;;
3.13)
echo "Using 3.13 deps"
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
SETUPTOOLS_PINNED_VERSION=">=77.0.0"
PYYAML_PINNED_VERSION=">=6.0.1"
NUMPY_PINNED_VERSION="=2.1.0"
;;
3.12)
echo "Using 3.12 deps"
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
SETUPTOOLS_PINNED_VERSION=">=77.0.0"
PYYAML_PINNED_VERSION=">=6.0.1"
NUMPY_PINNED_VERSION="=2.0.2"
;;
3.11)
echo "Using 3.11 deps"
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
SETUPTOOLS_PINNED_VERSION=">=77.0.0"
PYYAML_PINNED_VERSION=">=5.3"
NUMPY_PINNED_VERSION="=2.0.2"
;;
3.10)
echo "Using 3.10 deps"
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
SETUPTOOLS_PINNED_VERSION=">=77.0.0"
PYYAML_PINNED_VERSION=">=5.3"
NUMPY_PINNED_VERSION="=2.0.2"
;;
3.9)
echo "Using 3.9 deps"
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
SETUPTOOLS_PINNED_VERSION=">=77.0.0"
PYYAML_PINNED_VERSION=">=5.3"
NUMPY_PINNED_VERSION="=2.0.2"
;;

View File

@ -7,12 +7,12 @@ max-line-length = 120
# C408 ignored because we like the dict keyword argument syntax
# E501 is not flexible enough, we're using B950 instead
ignore =
E203,E305,E402,E501,E704,E721,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,F824,
E203,E305,E402,E501,E704,E721,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
# to line this up with executable bit
EXE001,
# these ignores are from flake8-bugbear; please fix!
B007,B008,B017,B019,B023,B028,B903,B904,B905,B906,B907,B908,B910
B007,B008,B017,B019,B023,B028,B903,B904,B905,B906,B907
# these ignores are from flake8-comprehensions; please fix!
C407,
# these ignores are from flake8-logging-format; please fix!

View File

@ -53,12 +53,16 @@ self-hosted-runner:
- linux.rocm.gpu.mi250
- linux.rocm.gpu.2
- linux.rocm.gpu.4
# gfx942 runners
- linux.rocm.gpu.gfx942.2
- linux.rocm.gpu.gfx942.4
# MI300 runners
- linux.rocm.gpu.mi300.2
- linux.rocm.gpu.mi300.4
- rocm-docker
# Repo-specific Apple hosted runners
- macos-m1-ultra
- macos-m2-14
# Org wise AWS `mac2.metal` runners (2020 Mac mini hardware powered by Apple silicon M1 processors)
- macos-m1-stable
- macos-m1-13
- macos-m1-14
# GitHub-hosted MacOS runners
- macos-latest-xlarge

View File

@ -0,0 +1,78 @@
name: build android
description: build android for a specific arch
inputs:
arch:
description: arch to build
required: true
arch-for-build-env:
description: |
arch to pass to build environment.
This is currently different than the arch name we use elsewhere, which
should be fixed.
required: true
github-secret:
description: github token
required: true
build-environment:
required: true
description: Top-level label for what's being built/tested.
docker-image:
required: true
description: Name of the base docker image to build with.
branch:
required: true
description: What branch we are building on.
outputs:
container_id:
description: Docker container identifier used to build the artifacts
value: ${{ steps.build.outputs.container_id }}
runs:
using: composite
steps:
- name: Build-${{ inputs.arch }}
id: build
shell: bash
env:
BRANCH: ${{ inputs.branch }}
BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-${{ inputs.arch-for-build-env }}-build"
AWS_DEFAULT_REGION: us-east-1
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
DOCKER_IMAGE: ${{ inputs.docker-image }}
MATRIX_ARCH: ${{ inputs.arch }}
run: |
# detached container should get cleaned up by teardown_ec2_linux
set -exo pipefail
export container_name
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e AWS_DEFAULT_REGION \
-e PR_NUMBER \
-e SHA1 \
-e BRANCH \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SKIP_SCCACHE_INITIALIZATION=1 \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
git submodule sync && git submodule update -q --init --recursive --depth 1
docker cp "${GITHUB_WORKSPACE}/." "${container_name}:/var/lib/jenkins/workspace"
(echo "sudo chown -R jenkins . && .ci/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete" | docker exec -u jenkins -i "${container_name}" bash) 2>&1
# Copy install binaries back
mkdir -p "${GITHUB_WORKSPACE}/build_android_install_${MATRIX_ARCH}"
docker cp "${container_name}:/var/lib/jenkins/workspace/build_android/install" "${GITHUB_WORKSPACE}/build_android_install_${MATRIX_ARCH}"
echo "container_id=${container_name}" >> "${GITHUB_OUTPUT}"

View File

@ -70,7 +70,7 @@ runs:
set -eux
# PyYAML 6.0 doesn't work with MacOS x86 anymore
# This must run on Python-3.7 (AmazonLinux2) so can't use request=3.32.2
python3 -m pip install requests==2.27.1 pyyaml==6.0.2
python3 -m pip install requests==2.27.1 pyyaml==6.0.1
- name: Parse ref
id: parse-ref

View File

@ -1 +1 @@
6fbc710b617f79b992ef2ebc7f95e818aa390293
00b0c91db92c51a11356249262577b9fa26c18c5

View File

@ -1 +1 @@
7f1de94a4c2d14f59ad4ca84538c36084ea6b2c8
5fb5024118e9bb9decf96c2b0b1a8f0010bf56be

View File

@ -1 +0,0 @@
6a39ba85fe0f2fff9494b5eccea717c93510c230

View File

@ -1 +1 @@
b6a5b82b9948b610fa4c304d0d869c82b8f17db1
1c00dea2c9adb2137903c86b4191e8c247f8fda9

View File

@ -76,8 +76,8 @@
- .github/ci_commit_pins/audio.txt
- .github/ci_commit_pins/vision.txt
- .github/ci_commit_pins/torchdynamo.txt
- .github/ci_commit_pins/vllm.txt
- .ci/docker/ci_commit_pins/triton.txt
- .ci/docker/ci_commit_pins/vllm.txt
approved_by:
- pytorchbot
mandatory_checks_name:
@ -131,6 +131,21 @@
- Lint
- pull
- name: Mobile
patterns:
- ios/**
- android/**
- test/mobile/**
approved_by:
- linbinyu
- IvanKobzarev
- dreiss
- raziel
mandatory_checks_name:
- EasyCLA
- Lint
- pull
- name: PrimTorch
patterns:
- torch/_meta_registrations.py
@ -477,23 +492,6 @@
- srossross
- chillee
- zou3519
- guilhermeleobas
mandatory_checks_name:
- EasyCLA
- Lint
- pull
- name: Dynamo
patterns:
- torch/_dynamo/**
- torch/csrc/dynamo/**
- test/dynamo/**
- test/dynamo_expected_failures/**
- test/dynamo_skips/**
- test/inductor_expected_failures/**
- test/inductor_skips/**
approved_by:
- guilhermeleobas
mandatory_checks_name:
- EasyCLA
- Lint

View File

@ -31,9 +31,7 @@ ciflow_push_tags:
- ciflow/pull
- ciflow/h100
- ciflow/h100-distributed
- ciflow/win-arm64
- ciflow/h100-symm-mem
- ciflow/h100-cutlass-backend
retryable_workflows:
- pull
- trunk

View File

@ -7,9 +7,9 @@
# .ci/docker/requirements-ci.txt
boto3==1.35.42
jinja2==3.1.6
lintrunner==0.12.7
ninja==1.10.0.post1
lintrunner==0.10.7
ninja==1.11.1.4
nvidia-ml-py==11.525.84
pyyaml==6.0.2
pyyaml==6.0
requests==2.32.4
rich==14.1.0
rich==10.9.0

View File

@ -2,17 +2,17 @@ boto3==1.35.42
cmake==3.27.*
expecttest==0.3.0
fbscribelogger==0.1.7
filelock==3.18.0
filelock==3.6.0
hypothesis==6.56.4
librosa>=0.6.2
mpmath==1.3.0
networkx==2.8.7
ninja==1.10.2.4
ninja==1.11.1.4
numba==0.59.0
numpy==1.26.4
opt-einsum>=3.3
optree==0.13.0
packaging==23.1
packaging==25.0
parameterized==0.8.1
pillow==10.3.0
protobuf==5.29.4
@ -26,7 +26,7 @@ pytest-xdist==3.3.1
pytest==7.3.2
pyyaml==6.0.2
scipy==1.12.0
setuptools==72.1.0
setuptools==80.9.0
sympy==1.13.3
tlparse==0.3.30
tensorboard==2.13.0

View File

@ -193,7 +193,7 @@ LIBTORCH_CONTAINER_IMAGES: dict[str, str] = {
"cpu": "libtorch-cxx11-builder:cpu",
}
FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"]
FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
@ -315,11 +315,6 @@ def generate_wheels_matrix(
# TODO: Enable python 3.13t on cpu-s390x
if gpu_arch_type == "cpu-s390x" and python_version == "3.13t":
continue
# TODO: Enable python 3.14 on non linux OSes
if os != "linux" and (
python_version == "3.14" or python_version == "3.14t"
):
continue
if use_split_build and (
arch_version not in ["12.6", "12.8", "12.9", "cpu"] or os != "linux"

View File

@ -2,7 +2,7 @@
set -ex
# Use uv to speed up lintrunner init
python3 -m pip install -U uv==0.8.* setuptools
python3 -m pip install -U uv setuptools
CACHE_DIRECTORY="/tmp/.lintbin"
# Try to recover the cached binaries

View File

@ -1891,9 +1891,7 @@ def validate_revert(
else pr.get_comment_by_id(comment_id)
)
if comment.editor_login is not None:
raise PostCommentError(
"Halting the revert as the revert comment has been edited."
)
raise PostCommentError("Don't want to revert based on edited command")
author_association = comment.author_association
author_login = comment.author_login
allowed_reverters = ["COLLABORATOR", "MEMBER", "OWNER"]

View File

@ -10,7 +10,7 @@ if "%PY_VERS%" == "3.13t" (
call conda create -n %PYTHON_PREFIX% -y -c=conda-forge python=%PY_VERS%
)
:: Fix cmake version for issue https://github.com/pytorch/pytorch/issues/150480
call conda run -n %PYTHON_PREFIX% pip install wheel pybind11 certifi cython cmake==3.31.6 setuptools==72.1.0 ninja
call conda run -n %PYTHON_PREFIX% pip install wheel pybind11 certifi cython cmake==3.31.6 setuptools==78.1.1 ninja
dir "%VC_INSTALL_PATH%"

View File

@ -27,7 +27,7 @@ jobs:
PR_NUMBER="${{ github.event.number }}"
# Use gh CLI to get changed files in the PR with explicit repo
CHANGED_FILES=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/files --paginate --jq '.[] | select(.status != "removed") | .filename' | tr '\n' ' ' | sed 's/ $//')
CHANGED_FILES=$(gh pr view "$PR_NUMBER" --repo "${{ github.repository }}" --json files --jq '.files[].path' | tr '\n' ' ' | sed 's/ $//')
if [ -z "$CHANGED_FILES" ]; then
echo "No changed files found, setting to '*'"
@ -40,4 +40,4 @@ jobs:
else
echo "Not in PR context, setting changed files to '*'"
echo "changed-files=*" >> "$GITHUB_OUTPUT"
fi
fi

View File

@ -96,7 +96,7 @@ jobs:
steps:
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
if: ${{ !contains(matrix.runner, 'b200') && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
if: ${{ matrix.runner != 'B200' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
instructions: |
@ -109,7 +109,7 @@ jobs:
no-sudo: true
- name: Setup Python
if: contains(matrix.runner, 'b200')
if: matrix.runner == 'B200'
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: '3.12'
@ -117,7 +117,7 @@ jobs:
- name: Setup Linux
uses: ./.github/actions/setup-linux
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && !contains(matrix.runner, 'b200')
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && matrix.runner != 'B200'
- name: configure aws credentials
if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
@ -128,7 +128,7 @@ jobs:
aws-region: us-east-1
- name: Login to Amazon ECR
if: ${{ inputs.aws-role-to-assume != '' && contains(matrix.runner, 'b200') }}
if: ${{ inputs.aws-role-to-assume != '' && matrix.runner == 'B200' }}
id: login-ecr
continue-on-error: true
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
@ -166,17 +166,17 @@ jobs:
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
with:
driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '570.133.07' }}
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && matrix.runner != 'B200' }}
- name: Setup GPU_FLAG for docker run
id: setup-gpu-flag
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || contains(matrix.runner, 'b200')) }}
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || matrix.runner == 'B200') }}
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
id: setup-sscache-port-flag
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && !contains(matrix.runner, 'b200') }}
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && matrix.runner != 'B200' }}
- name: Lock NVIDIA A100 40GB Frequency
run: |
@ -277,8 +277,8 @@ jobs:
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
# Do not set SCCACHE_S3_KEY_PREFIX to share the cache between all build jobs
SCCACHE_BUCKET: ${{ !contains(matrix.runner, 'b200') && 'ossci-compiler-cache-circleci-v2' || '' }}
SCCACHE_REGION: ${{ !contains(matrix.runner, 'b200') && 'us-east-1' || '' }}
SCCACHE_BUCKET: ${{ matrix.runner != 'B200' && 'ossci-compiler-cache-circleci-v2' || '' }}
SCCACHE_REGION: ${{ matrix.runner != 'B200' && 'us-east-1' || '' }}
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
DOCKER_IMAGE: ${{ inputs.docker-image }}
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
@ -403,7 +403,7 @@ jobs:
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
- name: Authenticate with AWS
if: ${{ contains(matrix.runner, 'b200') }}
if: ${{ matrix.runner == 'B200' }}
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results

View File

@ -80,6 +80,11 @@ jobs:
run: |
sysctl machdep.cpu.brand_string kern.osproductversion
- name: Install build toolchain
run: |
brew update --quiet
brew install --formula cmake ninja
- name: Clean up leftover processes on MacOS pet runner
continue-on-error: true
run: |

View File

@ -269,8 +269,8 @@ jobs:
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
- name: Change permissions (only needed for kubernetes runners for now)
if: ${{ always() && steps.test.conclusion && (contains(matrix.runner, 'gfx942') || contains(matrix.runner, 'mi355')) }}
- name: Change permissions (only needed for MI300 runners for now)
if: ${{ always() && steps.test.conclusion && contains(matrix.runner, 'mi300') }}
run: |
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "sudo chown -R 1001:1001 test"

View File

@ -50,7 +50,7 @@ jobs:
strategy:
fail-fast: false
matrix:
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ]
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t" ]
device: ["cuda", "rocm", "xpu", "aarch64"]
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
include:
@ -126,12 +126,6 @@ jobs:
3.13t)
PYTHON_EXECUTABLE=/opt/python/cp313-cp313t/bin/python
;;
3.14)
PYTHON_EXECUTABLE=/opt/python/cp314-cp314/bin/python
;;
3.14t)
PYTHON_EXECUTABLE=/opt/python/cp314-cp314t/bin/python
;;
*)
echo "Unsupported python version ${PY_VERS}"
exit 1

View File

@ -34,8 +34,7 @@ jobs:
contents: read
pull-requests: write
name: Check labels
# Disabling the job until https://github.com/pytorch/pytorch/issues/159825 is resolved
if: github.repository_owner == 'pytorch' && false
if: github.repository_owner == 'pytorch'
runs-on: linux.24_04.4x
steps:
- name: Checkout PyTorch

View File

@ -7,8 +7,7 @@ on:
jobs:
ghstack-mergeability-check:
# Disabling the job until https://github.com/pytorch/pytorch/issues/159825 is resolved
if: github.repository_owner == 'pytorch' && false
if: github.repository_owner == 'pytorch'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@ -57,7 +56,7 @@ jobs:
cache: pip
architecture: x64
- run: pip install pyyaml==6.0.2
- run: pip install pyyaml==6.0
shell: bash
- name: Verify mergeability

View File

@ -26,7 +26,7 @@ jobs:
cache: pip
# Not the direct dependencies but the script uses trymerge
- run: pip install pyyaml==6.0.2
- run: pip install pyyaml==6.0
- name: Setup committer id
run: |

View File

@ -50,13 +50,17 @@ jobs:
runner: [linux.12xlarge]
docker-image-name: [
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11,
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm,
pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks,
pytorch-linux-jammy-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-jammy-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks,
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks,
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks,
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9,
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11,
pytorch-linux-jammy-py3.9-clang12,
pytorch-linux-jammy-py3.11-clang12,
pytorch-linux-jammy-py3.12-clang12,
pytorch-linux-jammy-py3.13-clang12,
pytorch-linux-jammy-rocm-n-py3,
pytorch-linux-noble-rocm-n-py3,
@ -71,8 +75,7 @@ jobs:
pytorch-linux-jammy-py3-clang12-onnx,
pytorch-linux-jammy-linter,
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter,
# Executorch pin needs update
# pytorch-linux-jammy-py3-clang12-executorch,
pytorch-linux-jammy-py3-clang12-executorch,
pytorch-linux-jammy-py3.12-triton-cpu
]
include:

View File

@ -144,7 +144,7 @@ jobs:
run: |
make -f docker.Makefile "${BUILD_IMAGE_TYPE}-image"
- name: Push nightly tags
if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' && matrix.platform == 'linux/amd4' }}
if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' && matrix.build_platforms == 'linux/amd4' }}
run: |
PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-cuda${CUDA_VERSION_SHORT}-cudnn${CUDNN_VERSION}-runtime"
CUDA_SUFFIX="-cu${CUDA_VERSION}"

File diff suppressed because it is too large Load Diff

View File

@ -1,58 +0,0 @@
name: Limited CI for CUTLASS backend on H100
on:
pull_request:
paths:
- .github/workflows/h100-cutlass-backend.yml
workflow_dispatch:
schedule:
- cron: 22 9 * * * # every 24 hours about 2:22am PDT
push:
tags:
- ciflow/h100-cutlass-backend/*
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend:
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
cuda-arch-list: '9.0'
test-matrix: |
{ include: [
{ config: "h100_cutlass_backend", shard: 1, num_shards: 1, runner: "linux.aws.h100", owners: ["oncall:pt2"] },
]}
secrets: inherit
linux-jammy-cuda12_8-py3_10-gcc11-sm90-test:
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend.outputs.test-matrix }}
secrets: inherit

View File

@ -1,154 +0,0 @@
name: inductor-perf-b200
on:
schedule:
- cron: 0 7 * * 1-6
- cron: 0 7 * * 0
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
workflow_dispatch:
inputs:
training:
description: Run training (on by default)?
required: false
type: boolean
default: true
inference:
description: Run inference (on by default)?
required: false
type: boolean
default: true
default:
description: Run inductor_default?
required: false
type: boolean
default: false
dynamic:
description: Run inductor_dynamic_shapes?
required: false
type: boolean
default: false
cppwrapper:
description: Run inductor_cpp_wrapper?
required: false
type: boolean
default: false
cudagraphs:
description: Run inductor_cudagraphs?
required: false
type: boolean
default: true
freezing_cudagraphs:
description: Run inductor_cudagraphs with freezing for inference?
required: false
type: boolean
default: false
aotinductor:
description: Run aot_inductor for inference?
required: false
type: boolean
default: false
maxautotune:
description: Run inductor_max_autotune?
required: false
type: boolean
default: false
benchmark_configs:
description: The list of configs used the benchmark
required: false
type: string
default: inductor_huggingface_perf_cuda_b200,inductor_timm_perf_cuda_b200,inductor_torchbench_perf_cuda_b200
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
get-label-type:
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
opt_out_experiments: lf
build:
name: cuda12.8-py3.10-gcc9-sm100
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
# Use a bigger runner here because CUDA_ARCH 9.0 is only built for H100
# or newer GPUs, so it doesn't benefit much from existing compiler cache
# from trunk. Also use a memory-intensive runner here because memory is
# usually the bottleneck
runner: linux.12xlarge.memory
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
cuda-arch-list: '10.0'
test-matrix: |
{ include: [
{ config: "inductor_huggingface_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
{ config: "inductor_timm_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
{ config: "inductor_torchbench_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
]}
selected-test-configs: ${{ inputs.benchmark_configs }}
build-additional-packages: "vision audio fbgemm torchao"
secrets: inherit
test-periodically:
name: cuda12.8-py3.10-gcc9-sm100
uses: ./.github/workflows/_linux-test.yml
needs: build
if: github.event.schedule == '0 7 * * 1-6'
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
docker-image: ${{ needs.build.outputs.docker-image }}
test-matrix: ${{ needs.build.outputs.test-matrix }}
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
timeout-minutes: 720
disable-monitor: false
monitor-log-interval: 15
monitor-data-collect-interval: 4
secrets: inherit
test-weekly:
name: cuda12.8-py3.10-gcc9-sm100
uses: ./.github/workflows/_linux-test.yml
needs: build
if: github.event.schedule == '0 7 * * 0'
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true
docker-image: ${{ needs.build.outputs.docker-image }}
test-matrix: ${{ needs.build.outputs.test-matrix }}
timeout-minutes: 1440
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
disable-monitor: false
monitor-log-interval: 15
monitor-data-collect-interval: 4
secrets: inherit
test:
name: cuda12.8-py3.10-gcc9-sm100
uses: ./.github/workflows/_linux-test.yml
needs: build
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
docker-image: ${{ needs.build.outputs.docker-image }}
test-matrix: ${{ needs.build.outputs.test-matrix }}
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
timeout-minutes: 720
disable-monitor: false
monitor-log-interval: 15
monitor-data-collect-interval: 4
secrets: inherit

View File

@ -2,7 +2,7 @@ name: inductor-perf-nightly-h100
on:
schedule:
- cron: 15 0,12 * * 1-6
- cron: 15 0,4,8,12,16,20 * * 1-6
- cron: 0 7 * * 0
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
@ -126,7 +126,7 @@ jobs:
name: cuda12.8-py3.10-gcc9-sm90
uses: ./.github/workflows/_linux-test.yml
needs: build
if: github.event.schedule == '15 0,12 * * 1-6'
if: github.event.schedule == '15 0,4,8,12,16,20 * * 1-6'
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm90
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true

View File

@ -88,23 +88,23 @@ jobs:
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
]}
secrets: inherit

View File

@ -81,21 +81,21 @@ jobs:
sync-tag: rocm-build
test-matrix: |
{ include: [
{ config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamo_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamo_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamo_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamo_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamic_aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamic_aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamic_aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamo_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamo_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamo_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamo_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
{ config: "aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamic_aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamic_aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamic_aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
]}
secrets: inherit

View File

@ -47,8 +47,8 @@ jobs:
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
]}
secrets: inherit

View File

@ -28,6 +28,7 @@ jobs:
# than our AWS macos-m1-14 runners
test-matrix: |
{ include: [
{ config: "test_mps", shard: 1, num_shards: 1, runner: "macos-m1-13" },
{ config: "test_mps", shard: 1, num_shards: 1, runner: "macos-m1-14" },
{ config: "test_mps", shard: 1, num_shards: 1, runner: "macos-m2-15" },
]}

View File

@ -75,11 +75,10 @@ jobs:
repo-owner: pytorch
branch: main
pin-folder: .github/ci_commit_pins
# executorch jobs are disabled since it needs some manual work for the hash update
# - repo-name: executorch
# repo-owner: pytorch
# branch: main
# pin-folder: .ci/docker/ci_commit_pins
- repo-name: executorch
repo-owner: pytorch
branch: main
pin-folder: .ci/docker/ci_commit_pins
- repo-name: triton
repo-owner: triton-lang
branch: main
@ -87,7 +86,7 @@ jobs:
- repo-name: vllm
repo-owner: vllm-project
branch: main
pin-folder: .github/ci_commit_pins
pin-folder: .ci/docker/ci_commit_pins
# Allow this to be triggered on either a schedule or on workflow_dispatch to allow for easier testing
if: github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
steps:

View File

@ -59,9 +59,9 @@ jobs:
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
]}
secrets: inherit

View File

@ -51,6 +51,37 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-jammy-cuda12_4-py3_10-gcc11-sm89-build:
name: linux-jammy-cuda12.4-py3.10-gcc11-sm89
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda12.4-py3.10-gcc11-sm89
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11
cuda-arch-list: 8.9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
]}
secrets: inherit
linux-jammy-cuda12_4-py3_10-gcc11-sm89-test:
name: linux-jammy-cuda12.4-py3.10-gcc11-sm89
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda12_4-py3_10-gcc11-sm89-build
- target-determination
with:
build-environment: linux-jammy-cuda12.4-py3.10-gcc11-sm89
docker-image: ${{ needs.linux-jammy-cuda12_4-py3_10-gcc11-sm89-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda12_4-py3_10-gcc11-sm89-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cuda12_4-py3_10-gcc11-build:
name: linux-jammy-cuda12.4-py3.10-gcc11
uses: ./.github/workflows/_linux-build.yml

View File

@ -292,14 +292,13 @@ jobs:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda12.8-py3.10-gcc11
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
cuda-arch-list: 8.9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
]}
secrets: inherit
@ -316,6 +315,21 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-py3-clang18-mobile-build:
name: linux-jammy-py3-clang18-mobile-build
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-py3-clang12-mobile-build
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan
build-generates-artifacts: false
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
]}
secrets: inherit
linux-jammy-cuda12_8-cudnn9-py3_9-clang12-build:
name: linux-jammy-cuda12.8-cudnn9-py3.9-clang12
uses: ./.github/workflows/_linux-build.yml
@ -403,8 +417,38 @@ jobs:
]}
secrets: inherit
linux-jammy-cuda12_8-py3_10-gcc11-sm89-build:
name: linux-jammy-cuda12.8-py3.10-gcc11-sm89
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm89
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
cuda-arch-list: 8.9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
]}
secrets: inherit
linux-jammy-cuda12_8-py3_10-gcc11-sm89-test:
name: linux-jammy-cuda12.8-py3.10-gcc11-sm89
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda12_8-py3_10-gcc11-sm89-build
- target-determination
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm89
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm89-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm89-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-py3-clang12-executorch-build:
if: false # Docker build needs pin update
name: linux-jammy-py3-clang12-executorch
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type

View File

@ -26,7 +26,7 @@ jobs:
architecture: x64
check-latest: false
cache: pip
- run: pip install pyyaml==6.0.2
- run: pip install pyyaml==6.0
- name: Setup committer id
run: |

View File

@ -48,12 +48,12 @@ jobs:
sync-tag: rocm-build
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
]}
secrets: inherit

View File

@ -1,68 +0,0 @@
name: rocm-mi355
on:
workflow_dispatch:
schedule:
- cron: 30 11,1 * * * # about 4:30am PDT and 6:30pm PDT
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions: read-all
jobs:
target-determination:
if: github.repository_owner == 'pytorch'
name: before-test
uses: ./.github/workflows/target_determination.yml
permissions:
id-token: write
contents: read
get-label-type:
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-noble-rocm-py3_12-build:
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
name: linux-noble-rocm-py3.12-mi355
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-noble-rocm-py3.12-mi355
docker-image-name: ci-image:pytorch-linux-noble-rocm-alpha-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.mi355.2" },
]}
secrets: inherit
linux-noble-rocm-py3_12-test:
permissions:
id-token: write
contents: read
name: linux-noble-rocm-py3.12-mi355
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-noble-rocm-py3_12-build
- target-determination
with:
build-environment: linux-noble-rocm-py3.12-mi355
docker-image: ${{ needs.linux-noble-rocm-py3_12-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-noble-rocm-py3_12-build.outputs.test-matrix }}
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor"
secrets: inherit

View File

@ -10,10 +10,6 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
get-default-label-prefix:
if: github.repository_owner == 'pytorch'

View File

@ -94,6 +94,7 @@ jobs:
{ config: "default", shard: 1, num_shards: 3, runner: "macos-m1-stable" },
{ config: "default", shard: 2, num_shards: 3, runner: "macos-m1-stable" },
{ config: "default", shard: 3, num_shards: 3, runner: "macos-m1-stable" },
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-13" },
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-14" },
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m2-15" },
]}
@ -205,7 +206,7 @@ jobs:
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-py3.9-gcc11
docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11
test-matrix: |
{ include: [
{ config: "verify_cachebench", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },

View File

@ -28,7 +28,7 @@ jobs:
check-latest: false
cache: pip
architecture: x64
- run: pip install pyyaml==6.0.2
- run: pip install pyyaml==6.0
- name: Setup committer id
run: |

View File

@ -25,7 +25,7 @@ jobs:
architecture: x64
check-latest: false
cache: pip
- run: pip install pyyaml==6.0.2
- run: pip install pyyaml==6.0
- name: Setup committer id
run: |

View File

@ -23,7 +23,7 @@ jobs:
with:
repository: pytorch/pytorch
stable-branch: viable/strict
requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\", \"linux-aarch64\"]'
requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\"]'
secret-bot-token: ${{ secrets.MERGEBOT_TOKEN }}
clickhouse-url: ${{ secrets.CLICKHOUSE_URL }}
clickhouse-username: ${{ secrets.CLICKHOUSE_VIABLESTRICT_USERNAME }}

View File

@ -14,7 +14,6 @@ on:
- inductor-periodic
- rocm
- rocm-mi300
- rocm-mi355
- inductor-micro-benchmark
- inductor-micro-benchmark-x86
- inductor-cu124

View File

@ -1,187 +0,0 @@
name: windows-arm64-build-test
on:
push:
tags:
- ciflow/win-arm64/*
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
PYTHON_VERSION: "3.12"
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
DOWNLOADS_DIR: c:\temp\downloads
DEPENDENCIES_DIR: c:\temp\dependencies
ENABLE_APL: 1
ENABLE_OPENBLAS: 0
BUILD_TYPE: release
permissions:
id-token: write
contents: read
jobs:
build:
# Don't run on forked repos.
if: github.repository_owner == 'pytorch'
runs-on: "windows-11-arm64-preview"
timeout-minutes: 240
steps:
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_sscache
aws-region: us-east-1
role-duration-seconds: 18000
- name: Enable long paths
shell: cmd
run: |
git config --system --get core.longpaths || echo "core.longpaths is not set, setting it now"
git config --system core.longpaths true
- name: Git checkout PyTorch
uses: actions/checkout@v4
with:
path: pytorch
submodules: recursive
- name: Bootstrap Python
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
- name: Parse ref
id: parse-ref
shell: bash
run: python pytorch/.github/scripts/parse_ref.py
- name: Get workflow job id
shell: bash
id: get-job-id
run: |
set -eux
python pytorch/.github/scripts/get_workflow_job_id.py "${GITHUB_RUN_ID}" "${RUNNER_NAME}"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Bootstrap APL
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
- name: Bootstrap Rust
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
- name: Bootstrap sccache
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_sccache.bat"
- name: Bootstrap Libuv
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_libuv.bat"
- name: Build
id: build
shell: cmd
env:
PYTORCH_FINAL_PACKAGE_DIR: C:/${{ github.run_id }}/build-results/
BRANCH: ${{ steps.parse-ref.outputs.branch }}
BUILD_WHEEL: 1
MAX_JOBS: 8
PYTHON_VERSION: "3.12"
SCCACHE_BUCKET: "ossci-compiler-cache"
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SCCACHE_REGION: us-east-1
VC_PRODUCT: "BuildTools"
VC_VERSION: ""
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
AWS_DEFAULT_REGION: us-east-1
USE_CUDA: '0'
USE_XPU: '0'
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
run: |
cd pytorch
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" arm64
powershell -ExecutionPolicy Bypass -File ".ci/pytorch/win-arm64-build.ps1"
- name: Upload artifacts
uses: actions/upload-artifact@v4.4.0
if: always()
with:
name: torch-wheel-win-arm64-py3-12
retention-days: 14
if-no-files-found: error
path: C:\${{ github.run_id }}\build-results
test:
if: github.repository_owner == 'pytorch'
strategy:
fail-fast: false
runs-on: "windows-11-arm64-preview"
needs: build
steps:
- name: Enable long paths
shell: cmd
run: |
git config --system --get core.longpaths || echo "core.longpaths is not set, setting it now"
git config --system core.longpaths true
- name: Git checkout PyTorch
uses: actions/checkout@v4
with:
path: pytorch
submodules: recursive
- name: Bootstrap Python
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
- name: Bootstrap Rust
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
- name: Get workflow job id
shell: bash
id: get-job-id
run: |
set -eux
python pytorch/.github/scripts/get_workflow_job_id.py "${GITHUB_RUN_ID}" "${RUNNER_NAME}"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Download Build Artifacts
uses: actions/download-artifact@v4.1.7
with:
name: torch-wheel-win-arm64-py3-12
path: C:\${{ github.run_id }}\build-results
- name: Test
id: test
shell: cmd
env:
USE_CUDA: '0'
INSTALL_WINDOWS_SDK: 1
PYTHON_VERSION: "3.12"
VC_PRODUCT: "BuildTools"
AWS_DEFAULT_REGION: us-east-1
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_WORKFLOW: ${{ github.workflow }}
GITHUB_JOB: ${{ github.job }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
PYTORCH_FINAL_PACKAGE_DIR: C:/${{ github.run_id }}/build-results/
run: |
mkdir "%PYTORCH_FINAL_PACKAGE_DIR%"
call pytorch/.ci/pytorch/windows/arm64/bootstrap_tests.bat
set GIT_BASH=C:\Program Files\Git\usr\bin\bash.exe
"%GIT_BASH%" -c "bash --noprofile --norc .ci/pytorch/win-arm64-test.sh"

View File

@ -39,16 +39,16 @@ init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'flake8==7.3.0',
'flake8-bugbear==24.12.12',
'flake8-comprehensions==3.16.0',
'flake8==6.1.0',
'flake8-bugbear==23.3.23',
'flake8-comprehensions==3.15.0',
'flake8-executable==2.1.3',
'flake8-logging-format==2024.24.12',
'flake8-pyi==25.5.0',
'flake8-simplify==0.22.0',
'flake8-logging-format==0.9.0',
'flake8-pyi==23.3.1',
'flake8-simplify==0.19.3',
'mccabe==0.7.0',
'pycodestyle==2.14.0',
'pyflakes==3.4.0',
'pycodestyle==2.11.1',
'pyflakes==3.1.0',
'torchfix==0.4.0 ; python_version >= "3.9" and python_version < "3.13"',
]
@ -158,16 +158,16 @@ init_command = [
'mypy==1.16.0',
'sympy==1.13.3',
'types-requests==2.27.25',
'types-pyyaml==6.0.2',
'types-pyyaml==6.0.1',
'types-tabulate==0.8.8',
'types-protobuf==5.29.1.20250403',
'types-setuptools==79.0.0.20250422',
'types-jinja2==2.11.9',
'types-colorama==0.4.6',
'filelock==3.18.0',
'filelock==3.13.1',
'junitparser==2.1.1',
'rich==14.1.0',
'pyyaml==6.0.2',
'rich==10.9.0',
'pyyaml==6.0.1',
'optree==0.13.0',
'dataclasses-json==0.6.7',
'pandas==2.2.3',
@ -1111,7 +1111,7 @@ init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'pyyaml==6.0.2',
'PyYAML==6.0.1',
]
[[linter]]
@ -1133,7 +1133,7 @@ init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'pyyaml==6.0.2',
'PyYAML==6.0.1',
]
[[linter]]
@ -1794,12 +1794,3 @@ include_patterns = [
'torch/header_only_apis.txt',
]
is_formatter = false
[[linter]]
code = "GB_REGISTRY"
include_patterns = ["torch/_dynamo/**/*.py"]
command = [
"python3",
"tools/linter/adapters/gb_registry_linter.py",
]

View File

@ -1,18 +1 @@
- This is the only AGENTS.md, there are no recursive AGENTS.md
- When you are working on a bug, first create a standalone file that
reproduces the bug and verify it fails in the expected way. Use this to
test if your changes work. Once the change is passing, find an appropriate
test file to add the test to and make sure to follow local conventions on
the test file.
- If you are running the real test suite, DO NOT run the entire test suite.
Instead run only a single test case, e.g., 'python test/test_torch.py TestTorch.test_dir'
- Do NOT run setup.py, you do not have a working build environment
- Do NOT run pre-commit, it is not setup
- To run lint, run 'lintrunner -a' (which will autoapply changes). lintrunner
ONLY accepts this flag, do not try to run on individual files.
- Do NOT attempt to install dependencies, you do not have Internet access
- When you are ready to make a PR, do exactly these steps:
- git stash -u
- git reset --hard $(cat /tmp/orig_work.txt) # NB: reset to the LOCAL branch, do NOT fetch
- git stash pop
- Resolve conflicts if necessary

View File

@ -679,7 +679,6 @@ cc_library(
[
"torch/*.h",
"torch/csrc/**/*.h",
"torch/nativert/**/*.h",
"torch/csrc/distributed/c10d/**/*.hpp",
"torch/lib/libshm/*.h",
],

View File

@ -564,7 +564,7 @@ if(MSVC)
set(CMAKE_NINJA_CMCLDEPS_RC OFF)
if(MSVC_Z7_OVERRIDE)
# CMake set debug flags to use /Z7
set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "$<$<CONFIG:Debug,RelWithDebInfo>:Embedded>")
set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
endif()
foreach(
flag_var
@ -872,14 +872,6 @@ cmake_dependent_option(
"USE_CUDA OR USE_ROCM;NOT MSVC"
OFF)
cmake_dependent_option(
USE_FBGEMM_GENAI
"Whether to build FBGEMM GenAI quantized GEMM kernels.\
Will be disabled if not supported by the platform"
OFF
"USE_CUDA OR USE_ROCM"
OFF)
# CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem
# Eff Attention won't
cmake_dependent_option(
@ -913,10 +905,6 @@ if(USE_FBGEMM)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
endif()
if(USE_FBGEMM_GENAI)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM_GENAI")
endif()
if(USE_PYTORCH_QNNPACK)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK")
endif()

View File

@ -14,6 +14,7 @@
/torch/csrc/autograd/ @albanD @soulitzer
/torch/autograd/ @albanD @soulitzer
/tools/autograd/ @albanD @soulitzer
/torch/header_only_apis.txt @janeyx99
/torch/nn/ @albanD @jbschlosser @mikaylagawarecki
/torch/optim/ @albanD @janeyx99
/test/test_public_bindings.py @albanD
@ -50,12 +51,12 @@ nn/qat/ @jerryzh168
/torch/csrc/distributed/c10d/Ops.* @kwen2501
# ONNX Export
/torch/_dynamo/backends/onnxrt.py @titaiwangms @xadupre @justinchuby
/torch/csrc/jit/passes/onnx.h @titaiwangms @xadupre
/torch/csrc/jit/passes/onnx.cpp @titaiwangms @xadupre
/torch/csrc/jit/passes/onnx/ @titaiwangms @xadupre
/torch/onnx/ @titaiwangms @xadupre @justinchuby
/test/onnx/ @titaiwangms @xadupre @justinchuby
/torch/_dynamo/backends/onnxrt.py @wschin
/torch/csrc/jit/passes/onnx.h @titaiwangms @shubhambhokare1
/torch/csrc/jit/passes/onnx.cpp @titaiwangms @shubhambhokare1
/torch/csrc/jit/passes/onnx/ @titaiwangms @shubhambhokare1
/torch/onnx/ @titaiwangms @shubhambhokare1 @justinchuby @wschin
/test/onnx/ @titaiwangms @shubhambhokare1 @justinchuby @wschin
# CI
/.ci @pytorch/pytorch-dev-infra
@ -195,8 +196,3 @@ torch/backends/cudnn/ @eqy @syed-ahmed
/torch/utils/_cxx_pytree.py @XuehaiPan
/torch/utils/pytree/ @XuehaiPan
/torch/_dynamo/polyfills/pytree.py @XuehaiPan
# Relating to libtorch ABI
/torch/csrc/stable/ @janeyx99 @mikaylagawarecki
/torch/headeronly/ @janeyx99
/torch/header_only_apis.txt @janeyx99

View File

@ -47,6 +47,18 @@ WORKDIR /opt/pytorch
COPY . .
RUN git submodule update --init --recursive
FROM conda as build
ARG CMAKE_VARS
WORKDIR /opt/pytorch
COPY --from=conda /opt/conda /opt/conda
COPY --from=submodule-update /opt/pytorch /opt/pytorch
RUN make triton
RUN --mount=type=cache,target=/opt/ccache \
export eval ${CMAKE_VARS} && \
TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 8.9 9.0 9.0a" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
python -m pip install --no-build-isolation -v .
FROM conda as conda-installs
ARG PYTHON_VERSION=3.11
ARG CUDA_PATH=cu121
@ -97,5 +109,4 @@ WORKDIR /workspace
FROM official as dev
# Should override the already installed version from the official-image stage
COPY --from=conda /opt/conda /opt/conda
COPY --from=submodule-update /opt/pytorch /opt/pytorch
COPY --from=build /opt/conda /opt/conda

View File

@ -276,7 +276,7 @@ conda install pkg-config libuv
pip install mkl-static mkl-include
# Add these packages if torch.distributed is needed.
# Distributed package support on Windows is a prototype feature and is subject to changes.
conda install -c conda-forge libuv
conda install -c conda-forge libuv=1.39
```
#### Install PyTorch
@ -294,12 +294,14 @@ Install PyTorch
```bash
export CMAKE_PREFIX_PATH="${CONDA_PREFIX:-'$(dirname $(which conda))/../'}:${CMAKE_PREFIX_PATH}"
python -m pip install -r requirements-build.txt
python -m pip install --no-build-isolation -v -e .
```
**On macOS**
```bash
python -m pip install -r requirements-build.txt
python -m pip install --no-build-isolation -v -e .
```

View File

@ -247,50 +247,6 @@ if(USE_MEM_EFF_ATTENTION)
list(APPEND ATen_ATTENTION_KERNEL_SRCS ${mem_eff_attention_cuda_kernels_cu})
endif()
IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH)
message(WARNING "Unsupported ROCM arch for FBGEMM GenAI, will set USE_FBGEMM_GENAI to OFF")
set(USE_FBGEMM_GENAI off)
endif()
# FBGEMM GenAI
IF(USE_FBGEMM_GENAI)
set(FBGEMM_THIRD_PARTY ${PROJECT_SOURCE_DIR}/third_party/fbgemm/external/)
set(FBGEMM_GENAI_DIR ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize)
if(USE_ROCM)
# Only include the kernels we want to build to avoid increasing binary size.
file(GLOB_RECURSE fbgemm_genai_native_rocm_hip
"${FBGEMM_GENAI_DIR}/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped*.hip"
"${FBGEMM_GENAI_DIR}/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip")
set_source_files_properties(${fbgemm_genai_native_rocm_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Add additional HIPCC compiler flags for performance
set(FBGEMM_GENAI_EXTRA_HIPCC_FLAGS
-mllvm
-amdgpu-coerce-illegal-types=1
-mllvm
-enable-post-misched=0
-mllvm
-greedy-reverse-local-assignment=1
-fhip-new-launch-api)
hip_add_library(
fbgemm_genai STATIC
${fbgemm_genai_native_rocm_hip}
HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS})
set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES)
target_include_directories(fbgemm_genai PUBLIC
# FBGEMM version of Composable Kernel is used due to some customizations
${FBGEMM_THIRD_PARTY}/composable_kernel/include
${FBGEMM_THIRD_PARTY}/composable_kernel/library/include
${FBGEMM_GENAI_DIR}/include/
${FBGEMM_GENAI_DIR}/common/include/
)
endif()
endif()
# XNNPACK
file(GLOB native_xnnpack "native/xnnpack/*.cpp")
@ -439,7 +395,6 @@ if(USE_ROCM)
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/example/ck_tile/01_fmha)
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/aiter/csrc/include)
_pytorch_rocm_generate_ck_conf()
@ -631,10 +586,17 @@ if(USE_CUDA AND NOT USE_ROCM)
CUDA::cufft_static_nocallback
)
if(NOT BUILD_LAZY_CUDA_LINALG)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
CUDA::cusolver_static
${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a # needed for libcusolver_static
)
if(CUDA_VERSION_MAJOR LESS_EQUAL 11)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
CUDA::cusolver_static
${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static
)
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12)
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
CUDA::cusolver_static
${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a # needed for libcusolver_static
)
endif()
endif()
else()
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
@ -704,17 +666,21 @@ if(USE_MPS)
if(CAN_COMPILE_METAL)
foreach(SHADER ${native_mps_metal})
cmake_path(GET SHADER STEM TGT_STEM)
string(CONCAT TGT_BASIC ${TGT_STEM} "_31.air")
string(CONCAT TGT_BASIC ${TGT_STEM} "_30.air")
string(CONCAT TGT_BFLOAT ${TGT_STEM} "_31.air")
list(APPEND AIR_BASIC ${TGT_BASIC})
metal_to_air(${SHADER} ${TGT_BASIC} "-std=metal3.1")
list(APPEND AIR_BFLOAT ${TGT_BFLOAT})
metal_to_air(${SHADER} ${TGT_BASIC} "-std=metal3.0")
metal_to_air(${SHADER} ${TGT_BFLOAT} "-std=metal3.1")
endforeach()
air_to_metallib(kernels_basic.metallib ${AIR_BASIC})
air_to_metallib(kernels_bfloat.metallib ${AIR_BFLOAT})
add_custom_command(
COMMAND echo "// $$(date)" > metallib_dummy.cpp
DEPENDS kernels_basic.metallib
DEPENDS kernels_basic.metallib kernels_bfloat.metallib
OUTPUT metallib_dummy.cpp
COMMENT "Updating metallibs timestamp")
add_custom_target(metallibs DEPENDS kernels_basic.metallib metallib_dummy.cpp)
add_custom_target(metallibs DEPENDS kernels_basic.metallib kernels_bfloat.metallib metallib_dummy.cpp)
else()
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/native/mps")
foreach(SHADER ${native_mps_metal})

View File

@ -14,9 +14,7 @@
#include <ATen/cpu/FlushDenormal.h>
#ifdef USE_FBGEMM
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wextra-semi")
#include <fbgemm/Fbgemm.h>
C10_DIAGNOSTIC_POP()
#endif // USE_FBGEMM
#if defined(__aarch64__) && !defined(C10_MOBILE)
#include <cpuinfo.h>
@ -334,14 +332,6 @@ void Context::setBenchmarkLimitCuDNN(int b) {
benchmark_limit_cudnn = b;
}
bool Context::immediateMiopen() const {
return immediate_miopen;
}
void Context::setImmediateMiopen(bool b) {
immediate_miopen = b;
}
bool Context::allowTF32CuBLAS() const {
#ifdef USE_ROCM
const auto allow_tf32 = c10::utils::check_env(hipblaslt_allow_tf32);
@ -512,7 +502,7 @@ at::BlasBackend Context::blasPreferredBackend() {
static const std::vector<std::string> archs = {
"gfx90a", "gfx942",
#if ROCM_VERSION >= 60300
"gfx1100", "gfx1101", "gfx1200", "gfx1201", "gfx908",
"gfx1100", "gfx1101", "gfx1200", "gfx1201",
#endif
#if ROCM_VERSION >= 60500
"gfx950"

View File

@ -205,8 +205,6 @@ class TORCH_API Context {
void setBenchmarkCuDNN(bool);
int benchmarkLimitCuDNN() const;
void setBenchmarkLimitCuDNN(int);
bool immediateMiopen() const;
void setImmediateMiopen(bool);
bool deterministicCuDNN() const;
void setDeterministicCuDNN(bool);
bool deterministicMkldnn() const;
@ -442,7 +440,6 @@ class TORCH_API Context {
bool enabled_overrideable = true;
bool allow_fp16_bf16_reduction_mathSDP = false;
bool benchmark_cudnn = false;
bool immediate_miopen = false;
Float32MatmulPrecision float32_matmul_precision =
c10::utils::check_env("TORCH_ALLOW_TF32_CUBLAS_OVERRIDE") == true
? at::Float32MatmulPrecision::HIGH

View File

@ -132,9 +132,6 @@ DLDevice torchDeviceToDLDevice(at::Device device) {
case DeviceType::PrivateUse1:
ctx.device_type = DLDeviceType::kDLExtDev;
break;
case DeviceType::MPS:
ctx.device_type = DLDeviceType::kDLMetal;
break;
default:
TORCH_CHECK_BUFFER(false, "Cannot pack tensors on " + device.str());
}
@ -167,8 +164,6 @@ static Device getATenDevice(DLDeviceType type, c10::DeviceIndex index, void* dat
return at::Device(DeviceType::MAIA, index);
case DLDeviceType::kDLExtDev:
return at::Device(DeviceType::PrivateUse1, index);
case DLDeviceType::kDLMetal:
return at::Device(DeviceType::MPS, index);
default:
TORCH_CHECK_BUFFER(
false, "Unsupported device_type: ", std::to_string(type));

View File

@ -9,36 +9,7 @@
namespace at {
/*
* Design:
* 1. ZeroTensors are regular tensors with TensorOptions, a storage
* pointing to nullptr and a ZeroTensor dispatch key set.
*
* 2. ZeroTensors are immutable. This is done to prevent data race in the case of multithreading
* (when two threads try to read the same zero tensor and materialize it in-place).
*
* 3. ZeroTensor has a boxed fallback that will be dispatched to any ops that don't
* have special ZeroTensor handling. This fallback materializes each ZeroTensor to
* `at::zeros({}, tensor.options()).expand(tensor.sizes())`.
* 4. ZeroTensors are handled above autograd. This is necessary because fallback
* operations are not differentiable.
* - Example: Consider add in the case it was using the fallback: zerotensor_a + b.
* zerotensor_a would be materialized to c=torch.zeros_like(zerotensor_a) after
* passing through the fallback. If this happens above the autograd, then the
* gradients would be populated on c instead of zerotensor_a.
*
* 5. The grad field is always populated with an honest to goodness tensor. This
* materialization of ZeroTensors will happen in:
* - AccumulateGrad for Backward Mode AD.
* - will never be required for ForwardMode AD.
* - This is because if all the tangents were undefined (efficient ZeroTensors),
* no computation will be performed (this is ensured via an existing pre-check).
*
* Today ZeroTensors are primarily used to represent undefined gradients in forward AD,
* it does not perfectly handle NaNs and Infs as we don't check the actual values
* and assume that they are non-zero, non-inf, non-NaN etc.
*/
// TODO: add a note explaining the design decisions
// ZeroTensors are designed to be immutable. Thus, we error out when an in-place operation is performed on ZeroTensors
static void zeroTensorFallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, torch::jit::Stack* stack) {
const auto& arguments = op.schema().arguments();

View File

@ -1 +1,55 @@
#include <torch/headeronly/cpu/vec/intrinsics.h>
#pragma once
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
/* GCC or clang-compatible compiler, targeting x86/x86-64 */
#include <x86intrin.h>
#elif defined(__clang__) && (defined(__ARM_NEON__) || defined(__aarch64__))
/* Clang-compatible compiler, targeting arm neon */
#include <arm_neon.h>
#if defined(__ARM_FEATURE_SVE)
/* CLANG-compatible compiler, targeting ARM with SVE */
#include <arm_sve.h>
#endif
#elif defined(_MSC_VER)
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#if _MSC_VER <= 1900
#define _mm256_extract_epi64(X, Y) \
(_mm_extract_epi64(_mm256_extractf128_si256(X, Y >> 1), Y % 2))
#define _mm256_extract_epi32(X, Y) \
(_mm_extract_epi32(_mm256_extractf128_si256(X, Y >> 2), Y % 4))
#define _mm256_extract_epi16(X, Y) \
(_mm_extract_epi16(_mm256_extractf128_si256(X, Y >> 3), Y % 8))
#define _mm256_extract_epi8(X, Y) \
(_mm_extract_epi8(_mm256_extractf128_si256(X, Y >> 4), Y % 16))
#endif
#elif defined(__GNUC__) && (defined(__ARM_NEON__) || defined(__aarch64__))
/* GCC-compatible compiler, targeting ARM with NEON */
#include <arm_neon.h>
#if defined(__ARM_FEATURE_SVE)
/* GCC-compatible compiler, targeting ARM with SVE */
#include <arm_sve.h>
#endif
#if defined(MISSING_ARM_VLD1)
#include <ATen/cpu/vec/vec256/missing_vld1_neon.h>
#elif defined(MISSING_ARM_VST1)
#include <ATen/cpu/vec/vec256/missing_vst1_neon.h>
#endif
#elif defined(__GNUC__) && defined(__IWMMXT__)
/* GCC-compatible compiler, targeting ARM with WMMX */
#include <mmintrin.h>
#elif defined(__s390x__)
// targets Z/architecture
// we will include vecintrin later
#elif (defined(__GNUC__) || defined(__xlC__)) && \
(defined(__VEC__) || defined(__ALTIVEC__))
/* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
#include <altivec.h>
/* We need to undef those tokens defined by <altivec.h> to avoid conflicts
with the C++ types. => Can still use __bool/__vector */
#undef bool
#undef vector
#undef pixel
#elif defined(__GNUC__) && defined(__SPE__)
/* GCC-compatible compiler, targeting PowerPC with SPE */
#include <spe.h>
#endif

View File

@ -5,7 +5,6 @@
#include <ATen/cpu/vec/sve/vec_common_sve.h>
#include <ATen/cpu/vec/sve/vec_float.h>
#include <ATen/cpu/vec/vec_base.h>
#include <c10/util/bit_cast.h>
#include <cmath>
namespace at {
namespace vec {
@ -37,7 +36,7 @@ class Vectorized<BFloat16> {
return VECTOR_WIDTH / sizeof(BFloat16);
}
Vectorized();
Vectorized() {}
Vectorized(svbfloat16_t v) : values(v) {}
Vectorized(int val);
Vectorized(BFloat16 val);
@ -307,11 +306,6 @@ Vectorized<c10::BFloat16> inline operator/(
return binary_operator_via_float(std::divides<Vectorized<float>>(), a, b);
}
inline Vectorized<BFloat16>::Vectorized() {
const short zero = 0;
values = svdup_n_bf16(c10::bit_cast<bfloat16_t>(zero));
}
inline Vectorized<BFloat16>::Vectorized(int val) {
auto vals_f = svdup_n_f32(val);
values = convert_float_bfloat16(vals_f, vals_f);

View File

@ -38,9 +38,7 @@ class Vectorized<double> {
static constexpr size_type size() {
return VECTOR_WIDTH / sizeof(double);
}
Vectorized() {
values = svdup_n_f64(0);
}
Vectorized() {}
Vectorized(svfloat64_t v) : values(v) {}
Vectorized(double val) {
values = svdup_n_f64(val);
@ -587,30 +585,6 @@ Vectorized<double> inline fmadd(
return svmad_f64_x(ptrue, a, b, c);
}
template <>
Vectorized<double> inline fnmadd(
const Vectorized<double>& a,
const Vectorized<double>& b,
const Vectorized<double>& c) {
return svmsb_f64_x(ptrue, a, b, c);
}
template <>
Vectorized<double> inline fmsub(
const Vectorized<double>& a,
const Vectorized<double>& b,
const Vectorized<double>& c) {
return svnmsb_f64_x(ptrue, a, b, c);
}
template <>
Vectorized<double> inline fnmsub(
const Vectorized<double>& a,
const Vectorized<double>& b,
const Vectorized<double>& c) {
return svnmad_f64_x(ptrue, a, b, c);
}
#endif // defined(CPU_CAPABILITY_SVE)
} // namespace CPU_CAPABILITY

View File

@ -38,9 +38,7 @@ class Vectorized<float> {
static constexpr size_type size() {
return VECTOR_WIDTH / sizeof(float);
}
Vectorized() {
values = svdup_n_f32(0);
}
Vectorized() {}
Vectorized(svfloat32_t v) : values(v) {}
Vectorized(float val) {
values = svdup_n_f32(val);
@ -758,30 +756,6 @@ Vectorized<float> inline fmadd(
return svmad_f32_x(ptrue, a, b, c);
}
template <>
Vectorized<float> inline fnmadd(
const Vectorized<float>& a,
const Vectorized<float>& b,
const Vectorized<float>& c) {
return svmsb_f32_x(ptrue, a, b, c);
}
template <>
Vectorized<float> inline fmsub(
const Vectorized<float>& a,
const Vectorized<float>& b,
const Vectorized<float>& c) {
return svnmsb_f32_x(ptrue, a, b, c);
}
template <>
Vectorized<float> inline fnmsub(
const Vectorized<float>& a,
const Vectorized<float>& b,
const Vectorized<float>& c) {
return svnmad_f32_x(ptrue, a, b, c);
}
#endif // defined(CPU_CAPABILITY_SVE)
} // namespace CPU_CAPABILITY

View File

@ -32,9 +32,7 @@ inline namespace CPU_CAPABILITY {
static constexpr size_type size() { \
return vl; \
} \
Vectorized() { \
values = svdup_n_s##bit(0); \
} \
Vectorized() {} \
Vectorized(svint##bit##_t v) : values(v) {} \
Vectorized(int##bit##_t val) { \
values = svdup_n_s##bit(val); \

View File

@ -552,15 +552,6 @@ Vectorized<c10::BFloat16> inline fmadd(
return a * b + c;
}
template <>
Vectorized<c10::BFloat16> inline fnmadd(
const Vectorized<c10::BFloat16>& a,
const Vectorized<c10::BFloat16>& b,
const Vectorized<c10::BFloat16>& c) {
// See NOTE [BF16 FMA] above.
return -a * b + c;
}
template <>
Vectorized<c10::BFloat16> inline fmsub(
const Vectorized<c10::BFloat16>& a,
@ -570,15 +561,6 @@ Vectorized<c10::BFloat16> inline fmsub(
return a * b - c;
}
template <>
Vectorized<c10::BFloat16> inline fnmsub(
const Vectorized<c10::BFloat16>& a,
const Vectorized<c10::BFloat16>& b,
const Vectorized<c10::BFloat16>& c) {
// See NOTE [BF16 FMA] above.
return -a * b - c;
}
#endif // !defined(C10_MOBILE) && defined(__aarch64__)
} // namespace CPU_CAPABILITY

View File

@ -83,9 +83,7 @@ class Vectorized<float> {
static constexpr size_type size() {
return 4;
}
Vectorized() {
values = vmovq_n_f32(0);
}
Vectorized() {}
Vectorized(float32x4_t v) : values(v) {}
Vectorized(float val) : values{vdupq_n_f32(val)} {}
Vectorized(float val0, float val1, float val2, float val3)
@ -584,14 +582,6 @@ Vectorized<float> inline fmadd(
return Vectorized<float>(vfmaq_f32(c, a, b));
}
template <>
Vectorized<float> inline fnmadd(
const Vectorized<float>& a,
const Vectorized<float>& b,
const Vectorized<float>& c) {
return Vectorized<float>(vfmsq_f32(c, a, b));
}
template <>
Vectorized<float> inline fmsub(
const Vectorized<float>& a,
@ -600,14 +590,6 @@ Vectorized<float> inline fmsub(
return Vectorized<float>(vnegq_f32(vfmsq_f32(c, a, b)));
}
template <>
Vectorized<float> inline fnmsub(
const Vectorized<float>& a,
const Vectorized<float>& b,
const Vectorized<float>& c) {
return Vectorized<float>(vnegq_f32(vfmaq_f32(c, a, b)));
}
inline Vectorized<float> Vectorized<float>::erf() const {
// constants
const Vectorized<float> neg_zero_vec(-0.f);

Some files were not shown because too many files have changed in this diff Show More