mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-29 19:24:55 +08:00
Update base for Update on "[map] support gen_schema for map"
In-place mutation may create inter-loop dependency that breaks the parallelism we have for map so we ban input mutations. [ghstack-poisoned]
This commit is contained in:
@ -438,9 +438,7 @@ def build_torchvision(
|
||||
)
|
||||
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
||||
elif build_version is not None:
|
||||
build_vars += (
|
||||
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
|
||||
)
|
||||
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
|
||||
if host.using_docker():
|
||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||
|
||||
@ -495,9 +493,7 @@ def build_torchdata(
|
||||
)
|
||||
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
||||
elif build_version is not None:
|
||||
build_vars += (
|
||||
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
|
||||
)
|
||||
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
|
||||
if host.using_docker():
|
||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||
|
||||
@ -553,9 +549,7 @@ def build_torchtext(
|
||||
)
|
||||
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
||||
elif build_version is not None:
|
||||
build_vars += (
|
||||
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
|
||||
)
|
||||
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
|
||||
if host.using_docker():
|
||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||
|
||||
@ -613,9 +607,7 @@ def build_torchaudio(
|
||||
)
|
||||
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
||||
elif build_version is not None:
|
||||
build_vars += (
|
||||
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
|
||||
)
|
||||
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
|
||||
if host.using_docker():
|
||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||
|
||||
|
||||
@ -104,7 +104,6 @@ If your new Docker image needs a library installed from a specific pinned commit
|
||||
```bash
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-new1)
|
||||
CUDA_VERSION=12.8.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=11
|
||||
VISION=yes
|
||||
|
||||
@ -93,7 +93,6 @@ tag=$(echo $image | awk -F':' '{print $2}')
|
||||
case "$tag" in
|
||||
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11)
|
||||
CUDA_VERSION=12.4
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
VISION=yes
|
||||
@ -104,7 +103,6 @@ case "$tag" in
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11)
|
||||
CUDA_VERSION=12.8.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
VISION=yes
|
||||
@ -115,7 +113,6 @@ case "$tag" in
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.8.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
@ -127,7 +124,6 @@ case "$tag" in
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.8.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
@ -139,7 +135,6 @@ case "$tag" in
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.8.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.13
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
@ -149,20 +144,8 @@ case "$tag" in
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm)
|
||||
CUDA_VERSION=12.8.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=11
|
||||
VISION=yes
|
||||
@ -171,45 +154,8 @@ case "$tag" in
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.13
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.8.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
@ -230,19 +176,7 @@ case "$tag" in
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.11-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.11
|
||||
CLANG_VERSION=12
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.9-gcc9)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=9
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-noble-rocm-n-py3)
|
||||
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-jammy-rocm-n-py3-benchmarks | pytorch-linux-noble-rocm-n-py3)
|
||||
if [[ $tag =~ "jammy" ]]; then
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
else
|
||||
@ -256,7 +190,9 @@ case "$tag" in
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
if [[ $tag =~ "benchmarks" ]]; then
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
fi
|
||||
;;
|
||||
pytorch-linux-noble-rocm-alpha-py3)
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
@ -268,7 +204,6 @@ case "$tag" in
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
|
||||
;;
|
||||
pytorch-linux-jammy-xpu-2025.0-py3)
|
||||
@ -299,7 +234,6 @@ case "$tag" in
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CUDA_VERSION=12.8.1
|
||||
CUDNN_VERSION=9
|
||||
CLANG_VERSION=12
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
@ -378,7 +312,6 @@ case "$tag" in
|
||||
fi
|
||||
if [[ "$image" == *cuda* ]]; then
|
||||
extract_version_from_image_name cuda CUDA_VERSION
|
||||
extract_version_from_image_name cudnn CUDNN_VERSION
|
||||
fi
|
||||
if [[ "$image" == *rocm* ]]; then
|
||||
extract_version_from_image_name rocm ROCM_VERSION
|
||||
@ -430,9 +363,6 @@ docker build \
|
||||
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
|
||||
--build-arg "GCC_VERSION=${GCC_VERSION}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
|
||||
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
|
||||
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
|
||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||
--build-arg "KATEX=${KATEX:-}" \
|
||||
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
|
||||
|
||||
@ -1 +1 @@
|
||||
11ec6354315768a85da41032535e3b7b99c5f706
|
||||
f7888497a1eb9e98d4c07537f0d0bcfe180d1363
|
||||
|
||||
@ -66,8 +66,9 @@ function do_cpython_build {
|
||||
ln -s pip3 ${prefix}/bin/pip
|
||||
fi
|
||||
# install setuptools since python 3.12 is required to use distutils
|
||||
${prefix}/bin/pip install wheel==0.45.1 setuptools==80.9.0
|
||||
local abi_tag=$(${prefix}/bin/python -c "from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag; print('{0}{1}-{2}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))")
|
||||
# packaging is needed to create symlink since wheel no longer provides needed information
|
||||
${prefix}/bin/pip install packaging==25.0 wheel==0.45.1 setuptools==80.9.0
|
||||
local abi_tag=$(${prefix}/bin/python -c "from packaging.tags import interpreter_name, interpreter_version; import sysconfig ; from sysconfig import get_config_var; print('{0}{1}-{0}{1}{2}'.format(interpreter_name(), interpreter_version(), 't' if sysconfig.get_config_var('Py_GIL_DISABLED') else ''))")
|
||||
ln -sf ${prefix} /opt/python/${abi_tag}
|
||||
}
|
||||
|
||||
|
||||
@ -68,8 +68,8 @@ function install_nvshmem {
|
||||
# download, unpack, install
|
||||
wget -q "${url}"
|
||||
tar xf "${filename}.tar.gz"
|
||||
cp -a "libnvshmem/include/"* /usr/local/include/
|
||||
cp -a "libnvshmem/lib/"* /usr/local/lib/
|
||||
cp -a "libnvshmem/include/"* /usr/local/cuda/include/
|
||||
cp -a "libnvshmem/lib/"* /usr/local/cuda/lib64/
|
||||
|
||||
# cleanup
|
||||
cd ..
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [[ -n "${CUDNN_VERSION}" ]]; then
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn
|
||||
pushd tmp_cudnn
|
||||
if [[ ${CUDA_VERSION:0:4} == "12.9" || ${CUDA_VERSION:0:4} == "12.8" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.10.2.21_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.10.2.21_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.10.2.21_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:2} == "11" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda11-archive"
|
||||
else
|
||||
print "Unsupported CUDA version ${CUDA_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
|
||||
tar xf ${CUDNN_NAME}.tar.xz
|
||||
cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
|
||||
cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf tmp_cudnn
|
||||
ldconfig
|
||||
fi
|
||||
@ -15,11 +15,37 @@ function install_timm() {
|
||||
commit=$(get_pinned_commit timm)
|
||||
|
||||
pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
|
||||
# Clean up
|
||||
conda_run pip uninstall -y torch torchvision triton
|
||||
}
|
||||
|
||||
function install_torchbench() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit torchbench)
|
||||
git clone https://github.com/pytorch/benchmark torchbench
|
||||
pushd torchbench
|
||||
git checkout "$commit"
|
||||
|
||||
python install.py --continue_on_fail
|
||||
|
||||
# TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
|
||||
# is regressing speedup metric. This needs to be investigated further
|
||||
pip install transformers==4.38.1
|
||||
|
||||
echo "Print all dependencies after TorchBench is installed"
|
||||
python -mpip freeze
|
||||
popd
|
||||
|
||||
chown -R jenkins torchbench
|
||||
}
|
||||
|
||||
# Pango is needed for weasyprint which is needed for doctr
|
||||
conda_install pango
|
||||
|
||||
# Stable packages are ok here, just to satisfy TorchBench check
|
||||
pip_install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
|
||||
|
||||
install_torchbench
|
||||
install_huggingface
|
||||
install_timm
|
||||
|
||||
# Clean up
|
||||
conda_run pip uninstall -y torch torchvision torchaudio triton torchao
|
||||
|
||||
@ -30,7 +30,7 @@ EOF
|
||||
|
||||
# we want the patch version of 6.4 instead
|
||||
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
|
||||
ROCM_VERSION="${ROCM_VERSION}.1"
|
||||
ROCM_VERSION="${ROCM_VERSION}.2"
|
||||
fi
|
||||
|
||||
# Default url values
|
||||
@ -85,16 +85,19 @@ EOF
|
||||
# CI no longer builds for ROCm 6.3, but
|
||||
# ROCm 6.4 did not yet fix the regression, also HIP branch names are different
|
||||
if [[ $(ver $ROCM_VERSION) -ge $(ver 6.4) ]] && [[ $(ver $ROCM_VERSION) -lt $(ver 7.0) ]]; then
|
||||
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.1) ]]; then
|
||||
HIP_BRANCH=release/rocm-rel-6.4
|
||||
CLR_HASH=606bc820b4b1f315d135da02a1f0b176ca50a92c # branch release/rocm-rel-6.4.1-statco-hotfix
|
||||
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.2) ]]; then
|
||||
HIP_TAG=rocm-6.4.2
|
||||
CLR_HASH=74d78ba3ac4bac235d02bcb48511c30b5cfdd457 # branch release/rocm-rel-6.4.2-statco-hotfix
|
||||
elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.1) ]]; then
|
||||
HIP_TAG=rocm-6.4.1
|
||||
CLR_HASH=efe6c35790b9206923bfeed1209902feff37f386 # branch release/rocm-rel-6.4.1-statco-hotfix
|
||||
elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
|
||||
HIP_BRANCH=release/rocm-rel-6.4
|
||||
HIP_TAG=rocm-6.4.0
|
||||
CLR_HASH=600f5b0d2baed94d5121e2174a9de0851b040b0c # branch release/rocm-rel-6.4-statco-hotfix
|
||||
fi
|
||||
# clr build needs CppHeaderParser but can only find it using conda's python
|
||||
python -m pip install CppHeaderParser
|
||||
git clone https://github.com/ROCm/HIP -b $HIP_BRANCH
|
||||
git clone https://github.com/ROCm/HIP -b $HIP_TAG
|
||||
HIP_COMMON_DIR=$(readlink -f HIP)
|
||||
git clone https://github.com/jeffdaily/clr
|
||||
pushd clr
|
||||
|
||||
@ -34,18 +34,27 @@ function install_ubuntu() {
|
||||
|
||||
# The xpu-smi packages
|
||||
apt-get install -y flex bison xpu-smi
|
||||
# Compute and Media Runtimes
|
||||
apt-get install -y \
|
||||
intel-opencl-icd intel-level-zero-gpu level-zero \
|
||||
intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
|
||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
|
||||
if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
|
||||
apt-get install -y intel-ocloc
|
||||
|
||||
if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
|
||||
# Compute and Media Runtimes
|
||||
apt-get install -y \
|
||||
intel-opencl-icd intel-level-zero-gpu level-zero \
|
||||
intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
|
||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
|
||||
# Development Packages
|
||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
|
||||
else # rolling driver
|
||||
apt-get install -y \
|
||||
intel-opencl-icd libze-intel-gpu1 libze1 \
|
||||
intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
|
||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||
libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc
|
||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev
|
||||
fi
|
||||
# Development Packages
|
||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
|
||||
|
||||
# Install Intel Support Packages
|
||||
apt-get install -y ${XPU_PACKAGES}
|
||||
|
||||
@ -130,11 +139,11 @@ function install_sles() {
|
||||
|
||||
}
|
||||
|
||||
# Default use GPU driver LTS releases
|
||||
XPU_DRIVER_VERSION="/lts/2350"
|
||||
if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
|
||||
# Use GPU driver rolling releases
|
||||
XPU_DRIVER_VERSION=""
|
||||
# Default use GPU driver rolling releases
|
||||
XPU_DRIVER_VERSION=""
|
||||
if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
|
||||
# Use GPU driver LTS releases
|
||||
XPU_DRIVER_VERSION="/lts/2350"
|
||||
fi
|
||||
|
||||
# Default use Intel® oneAPI Deep Learning Essentials 2025.0
|
||||
|
||||
@ -41,7 +41,7 @@ case ${DOCKER_TAG_PREFIX} in
|
||||
rocm*)
|
||||
# we want the patch version of 6.4 instead
|
||||
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
|
||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.1"
|
||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
||||
fi
|
||||
BASE_TARGET=rocm
|
||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||
|
||||
@ -77,7 +77,7 @@ case ${image} in
|
||||
manylinux2_28-builder:rocm*)
|
||||
# we want the patch version of 6.4 instead
|
||||
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
|
||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.1"
|
||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
||||
fi
|
||||
TARGET=rocm_final
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
|
||||
@ -63,11 +63,12 @@ lark==0.12.0
|
||||
#Pinned versions: 0.12.0
|
||||
#test that import:
|
||||
|
||||
librosa>=0.6.2 ; python_version < "3.11"
|
||||
librosa==0.10.2 ; python_version == "3.12"
|
||||
librosa>=0.6.2 ; python_version < "3.11" and platform_machine != "s390x"
|
||||
librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x"
|
||||
#Description: A python package for music and audio analysis
|
||||
#Pinned versions: >=0.6.2
|
||||
#test that import: test_spectral_ops.py
|
||||
#librosa depends on numba; disable it for s390x while numba is disabled too
|
||||
|
||||
#mkl #this breaks linux-bionic-rocm4.5-py3.7
|
||||
#Description: Intel oneAPI Math Kernel Library
|
||||
@ -110,14 +111,15 @@ ninja==1.11.1.3
|
||||
#Pinned versions: 1.11.1.3
|
||||
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
||||
|
||||
numba==0.49.0 ; python_version < "3.9"
|
||||
numba==0.55.2 ; python_version == "3.9"
|
||||
numba==0.55.2 ; python_version == "3.10"
|
||||
numba==0.60.0 ; python_version == "3.12"
|
||||
numba==0.49.0 ; python_version < "3.9" and platform_machine != "s390x"
|
||||
numba==0.55.2 ; python_version == "3.9" and platform_machine != "s390x"
|
||||
numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
|
||||
numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
||||
#Description: Just-In-Time Compiler for Numerical Functions
|
||||
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
|
||||
#test that import: test_numba_integration.py
|
||||
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
|
||||
#Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073
|
||||
|
||||
#numpy
|
||||
#Description: Provides N-dimensional arrays and linear algebra
|
||||
@ -221,9 +223,9 @@ pygments==2.15.0
|
||||
#Pinned versions: 2.12.0
|
||||
#test that import: the doctests
|
||||
|
||||
#PyYAML
|
||||
#pyyaml
|
||||
#Description: data serialization format
|
||||
#Pinned versions:
|
||||
#Pinned versions: 6.0.2
|
||||
#test that import:
|
||||
|
||||
#requests
|
||||
@ -233,7 +235,7 @@ pygments==2.15.0
|
||||
|
||||
#rich
|
||||
#Description: rich text and beautiful formatting in the terminal
|
||||
#Pinned versions: 10.9.0
|
||||
#Pinned versions: 14.1.0
|
||||
#test that import:
|
||||
|
||||
scikit-image==0.19.3 ; python_version < "3.10"
|
||||
@ -307,7 +309,7 @@ pytest-cpp==2.3.0
|
||||
#Pinned versions: 2.3.0
|
||||
#test that import:
|
||||
|
||||
z3-solver==4.15.1.0
|
||||
z3-solver==4.15.1.0 ; platform_machine != "s390x"
|
||||
#Description: The Z3 Theorem Prover Project
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
@ -361,7 +363,6 @@ pwlf==2.2.1
|
||||
#Pinned versions: 2.2.1
|
||||
#test that import: test_sac_estimator.py
|
||||
|
||||
|
||||
# To build PyTorch itself
|
||||
pyyaml
|
||||
pyzstd
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
sphinx==5.3.0
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 5.3.0
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@722b7e6f9ca512fcc526ad07d62b3d28c50bb6cd#egg=pytorch_sphinx_theme2
|
||||
|
||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
||||
@ -50,8 +50,8 @@ IPython==8.12.0
|
||||
#Pinned versions: 8.12.0
|
||||
|
||||
myst-nb==0.17.2
|
||||
#Description: This is used to generate PyTorch functorch docs
|
||||
#Pinned versions: 0.13.2
|
||||
#Description: This is used to generate PyTorch functorch and torch.compile docs.
|
||||
#Pinned versions: 0.17.2
|
||||
|
||||
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
||||
python-etcd==0.4.5
|
||||
@ -59,4 +59,3 @@ sphinx-copybutton==0.5.0
|
||||
sphinx-design==0.4.0
|
||||
sphinxcontrib-mermaid==1.0.0
|
||||
myst-parser==0.18.1
|
||||
myst-nb
|
||||
|
||||
@ -98,8 +98,9 @@ COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/huggingface.txt huggingface.txt
|
||||
COPY ci_commit_pins/timm.txt timm.txt
|
||||
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
|
||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
|
||||
|
||||
# (optional) Install non-default Ninja version
|
||||
ARG NINJA_VERSION
|
||||
|
||||
@ -98,8 +98,9 @@ COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/huggingface.txt huggingface.txt
|
||||
COPY ci_commit_pins/timm.txt timm.txt
|
||||
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
|
||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
|
||||
|
||||
ARG TRITON
|
||||
ARG TRITON_CPU
|
||||
|
||||
@ -138,28 +138,11 @@ fi
|
||||
|
||||
echo "Calling setup.py bdist at $(date)"
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
echo "Calling setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||
time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 \
|
||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||
echo "Finished setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||
echo "Calling setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||
time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
CMAKE_FRESH=1 python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||
echo "Finished setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||
else
|
||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||
fi
|
||||
echo "Finished setup.py bdist at $(date)"
|
||||
|
||||
# Build libtorch packages
|
||||
@ -272,10 +255,6 @@ ls /tmp/$WHEELHOUSE_DIR
|
||||
mkdir -p "/$WHEELHOUSE_DIR"
|
||||
mv /tmp/$WHEELHOUSE_DIR/torch*linux*.whl /$WHEELHOUSE_DIR/
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
mv /tmp/$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/ || true
|
||||
fi
|
||||
|
||||
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
mkdir -p /$LIBTORCH_HOUSE_DIR
|
||||
mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
|
||||
@ -452,16 +431,8 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
pushd $PYTORCH_ROOT/test
|
||||
|
||||
# Install the wheel for this Python version
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip uninstall -y "$TORCH_NO_PYTHON_PACKAGE_NAME" || true
|
||||
fi
|
||||
|
||||
pip uninstall -y "$TORCH_PACKAGE_NAME"
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
fi
|
||||
|
||||
pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
|
||||
# Print info on the libraries installed in this wheel
|
||||
|
||||
@ -194,7 +194,7 @@ ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
|
||||
ROCBLAS_LIB_DST=lib/rocblas/library
|
||||
ROCBLAS_ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
|
||||
ROCBLAS_OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
|
||||
ROCBLAS_LIB_FILES=($ROCBLAS_ARCH_SPECIFIC_FILES $OTHER_FILES)
|
||||
ROCBLAS_LIB_FILES=($ROCBLAS_ARCH_SPECIFIC_FILES $ROCBLAS_OTHER_FILES)
|
||||
|
||||
# hipblaslt library files
|
||||
HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library
|
||||
|
||||
@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# DO NOT ADD 'set -x' not to reveal CircleCI secret context environment variables
|
||||
set -eu -o pipefail
|
||||
|
||||
# This script uses linux host toolchain + mobile build options in order to
|
||||
# build & test mobile libtorch without having to setup Android/iOS
|
||||
# toolchain/simulator.
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
|
||||
# Install torch & torchvision - used to download & trace test model.
|
||||
# Ideally we should use the libtorch built on the PR so that backward
|
||||
# incompatible changes won't break this script - but it will significantly slow
|
||||
# down mobile CI jobs.
|
||||
# Here we install nightly instead of stable so that we have an option to
|
||||
# temporarily skip mobile CI jobs on BC-breaking PRs until they are in nightly.
|
||||
retry pip install --pre torch torchvision \
|
||||
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html \
|
||||
--progress-bar off
|
||||
|
||||
# Run end-to-end process of building mobile library, linking into the predictor
|
||||
# binary, and running forward pass with a real model.
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-static* ]]; then
|
||||
TEST_CUSTOM_BUILD_STATIC=1 test/mobile/custom_build/build.sh
|
||||
elif [[ "$BUILD_ENVIRONMENT" == *-mobile-lightweight-dispatch* ]]; then
|
||||
test/mobile/lightweight_dispatch/build.sh
|
||||
else
|
||||
TEST_DEFAULT_BUILD=1 test/mobile/custom_build/build.sh
|
||||
fi
|
||||
|
||||
print_sccache_stats
|
||||
@ -11,10 +11,6 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
|
||||
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
|
||||
fi
|
||||
|
||||
echo "Python version:"
|
||||
python --version
|
||||
|
||||
@ -54,9 +50,6 @@ if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
|
||||
export ATEN_THREADING=NATIVE
|
||||
fi
|
||||
|
||||
# Enable LLVM dependency for TensorExpr testing
|
||||
export USE_LLVM=/opt/llvm
|
||||
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
|
||||
|
||||
if ! which conda; then
|
||||
# In ROCm CIs, we are doing cross compilation on build machines with
|
||||
@ -124,26 +117,8 @@ if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then
|
||||
fi
|
||||
|
||||
# Use special scripts for Android builds
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
|
||||
export ANDROID_NDK=/opt/ndk
|
||||
build_args=()
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-arm-v7a* ]]; then
|
||||
build_args+=("-DANDROID_ABI=armeabi-v7a")
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *-arm-v8a* ]]; then
|
||||
build_args+=("-DANDROID_ABI=arm64-v8a")
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *-x86_32* ]]; then
|
||||
build_args+=("-DANDROID_ABI=x86")
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *-x86_64* ]]; then
|
||||
build_args+=("-DANDROID_ABI=x86_64")
|
||||
fi
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
|
||||
build_args+=("-DUSE_VULKAN=ON")
|
||||
fi
|
||||
build_args+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
|
||||
exec ./scripts/build_android.sh "${build_args[@]}" "$@"
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" != *android* && "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
|
||||
export USE_VULKAN=1
|
||||
# shellcheck disable=SC1091
|
||||
source /var/lib/jenkins/vulkansdk/setup-env.sh
|
||||
@ -198,7 +173,7 @@ fi
|
||||
|
||||
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
|
||||
# memory to build and will OOM
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && echo "${TORCH_CUDA_ARCH_LIST}" | tr ' ' '\n' | sed 's/$/>= 8.0/' | bc | grep -q 1; then
|
||||
export BUILD_CUSTOM_STEP="ninja -C build flash_attention -j 2"
|
||||
fi
|
||||
|
||||
@ -214,7 +189,6 @@ if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
|
||||
export USE_ASAN=1
|
||||
export REL_WITH_DEB_INFO=1
|
||||
export UBSAN_FLAGS="-fno-sanitize-recover=all"
|
||||
unset USE_LLVM
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
|
||||
@ -225,7 +199,7 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
|
||||
export USE_PRECOMPILED_HEADERS=1
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
|
||||
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
|
||||
fi
|
||||
|
||||
@ -287,22 +261,13 @@ else
|
||||
|
||||
WERROR=1 python setup.py clean
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
python3 tools/packaging/split_wheel.py bdist_wheel
|
||||
else
|
||||
WERROR=1 python setup.py bdist_wheel
|
||||
fi
|
||||
WERROR=1 python setup.py bdist_wheel
|
||||
else
|
||||
python setup.py clean
|
||||
if [[ "$BUILD_ENVIRONMENT" == *xla* ]]; then
|
||||
source .ci/pytorch/install_cache_xla.sh
|
||||
fi
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
echo "USE_SPLIT_BUILD cannot be used with xla or rocm"
|
||||
exit 1
|
||||
else
|
||||
python setup.py bdist_wheel
|
||||
fi
|
||||
python setup.py bdist_wheel
|
||||
fi
|
||||
pip_install_whl "$(echo dist/*.whl)"
|
||||
|
||||
|
||||
@ -229,7 +229,6 @@ function install_torchrec_and_fbgemm() {
|
||||
|
||||
pip_install tabulate # needed for newer fbgemm
|
||||
pip_install patchelf # needed for rocm fbgemm
|
||||
pushd /tmp
|
||||
|
||||
local wheel_dir=dist/fbgemm_gpu
|
||||
local found_whl=0
|
||||
@ -245,7 +244,7 @@ function install_torchrec_and_fbgemm() {
|
||||
if [ "${found_whl}" == "0" ]; then
|
||||
git clone --recursive https://github.com/pytorch/fbgemm
|
||||
pushd fbgemm/fbgemm_gpu
|
||||
git checkout "${fbgemm_commit}"
|
||||
git checkout "${fbgemm_commit}" --recurse-submodules
|
||||
python setup.py bdist_wheel \
|
||||
--build-variant=rocm \
|
||||
-DHIP_ROOT_DIR="${ROCM_PATH}" \
|
||||
@ -264,7 +263,6 @@ function install_torchrec_and_fbgemm() {
|
||||
done
|
||||
|
||||
rm -rf fbgemm
|
||||
popd
|
||||
else
|
||||
pip_build_and_install "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" dist/torchrec
|
||||
pip_build_and_install "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#subdirectory=fbgemm_gpu" dist/fbgemm_gpu
|
||||
@ -283,30 +281,6 @@ function clone_pytorch_xla() {
|
||||
fi
|
||||
}
|
||||
|
||||
function checkout_install_torchbench() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit torchbench)
|
||||
git clone https://github.com/pytorch/benchmark torchbench
|
||||
pushd torchbench
|
||||
git checkout "$commit"
|
||||
|
||||
if [ "$1" ]; then
|
||||
python install.py --continue_on_fail models "$@"
|
||||
else
|
||||
# Occasionally the installation may fail on one model but it is ok to continue
|
||||
# to install and test other models
|
||||
python install.py --continue_on_fail
|
||||
fi
|
||||
|
||||
# TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
|
||||
# is regressing speedup metric. This needs to be investigated further
|
||||
pip install transformers==4.38.1
|
||||
|
||||
echo "Print all dependencies after TorchBench is installed"
|
||||
python -mpip freeze
|
||||
popd
|
||||
}
|
||||
|
||||
function install_torchao() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit torchao)
|
||||
|
||||
@ -1,123 +0,0 @@
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from tempfile import mkdtemp
|
||||
|
||||
from cryptography import x509
|
||||
from cryptography.hazmat.primitives import hashes, serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||
from cryptography.x509.oid import NameOID
|
||||
|
||||
|
||||
temp_dir = mkdtemp()
|
||||
print(temp_dir)
|
||||
|
||||
|
||||
def genrsa(path):
|
||||
key = rsa.generate_private_key(
|
||||
public_exponent=65537,
|
||||
key_size=2048,
|
||||
)
|
||||
with open(path, "wb") as f:
|
||||
f.write(
|
||||
key.private_bytes(
|
||||
encoding=serialization.Encoding.PEM,
|
||||
format=serialization.PrivateFormat.TraditionalOpenSSL,
|
||||
encryption_algorithm=serialization.NoEncryption(),
|
||||
)
|
||||
)
|
||||
return key
|
||||
|
||||
|
||||
def create_cert(path, C, ST, L, O, key):
|
||||
subject = issuer = x509.Name(
|
||||
[
|
||||
x509.NameAttribute(NameOID.COUNTRY_NAME, C),
|
||||
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
|
||||
x509.NameAttribute(NameOID.LOCALITY_NAME, L),
|
||||
x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
|
||||
]
|
||||
)
|
||||
cert = (
|
||||
x509.CertificateBuilder()
|
||||
.subject_name(subject)
|
||||
.issuer_name(issuer)
|
||||
.public_key(key.public_key())
|
||||
.serial_number(x509.random_serial_number())
|
||||
.not_valid_before(datetime.now(timezone.utc))
|
||||
.not_valid_after(
|
||||
# Our certificate will be valid for 10 days
|
||||
datetime.now(timezone.utc) + timedelta(days=10)
|
||||
)
|
||||
.add_extension(
|
||||
x509.BasicConstraints(ca=True, path_length=None),
|
||||
critical=True,
|
||||
)
|
||||
.sign(key, hashes.SHA256())
|
||||
)
|
||||
# Write our certificate out to disk.
|
||||
with open(path, "wb") as f:
|
||||
f.write(cert.public_bytes(serialization.Encoding.PEM))
|
||||
return cert
|
||||
|
||||
|
||||
def create_req(path, C, ST, L, O, key):
|
||||
csr = (
|
||||
x509.CertificateSigningRequestBuilder()
|
||||
.subject_name(
|
||||
x509.Name(
|
||||
[
|
||||
# Provide various details about who we are.
|
||||
x509.NameAttribute(NameOID.COUNTRY_NAME, C),
|
||||
x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
|
||||
x509.NameAttribute(NameOID.LOCALITY_NAME, L),
|
||||
x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
|
||||
]
|
||||
)
|
||||
)
|
||||
.sign(key, hashes.SHA256())
|
||||
)
|
||||
with open(path, "wb") as f:
|
||||
f.write(csr.public_bytes(serialization.Encoding.PEM))
|
||||
return csr
|
||||
|
||||
|
||||
def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
|
||||
cert = (
|
||||
x509.CertificateBuilder()
|
||||
.subject_name(csr_cert.subject)
|
||||
.issuer_name(ca_cert.subject)
|
||||
.public_key(csr_cert.public_key())
|
||||
.serial_number(x509.random_serial_number())
|
||||
.not_valid_before(datetime.now(timezone.utc))
|
||||
.not_valid_after(
|
||||
# Our certificate will be valid for 10 days
|
||||
datetime.now(timezone.utc) + timedelta(days=10)
|
||||
# Sign our certificate with our private key
|
||||
)
|
||||
.sign(private_ca_key, hashes.SHA256())
|
||||
)
|
||||
with open(path, "wb") as f:
|
||||
f.write(cert.public_bytes(serialization.Encoding.PEM))
|
||||
return cert
|
||||
|
||||
|
||||
ca_key = genrsa(temp_dir + "/ca.key")
|
||||
ca_cert = create_cert(
|
||||
temp_dir + "/ca.pem",
|
||||
"US",
|
||||
"New York",
|
||||
"New York",
|
||||
"Gloo Certificate Authority",
|
||||
ca_key,
|
||||
)
|
||||
|
||||
pkey = genrsa(temp_dir + "/pkey.key")
|
||||
csr = create_req(
|
||||
temp_dir + "/csr.csr",
|
||||
"US",
|
||||
"California",
|
||||
"San Francisco",
|
||||
"Gloo Testing Company",
|
||||
pkey,
|
||||
)
|
||||
|
||||
cert = sign_certificate_request(temp_dir + "/cert.pem", csr, ca_cert, ca_key)
|
||||
@ -157,6 +157,29 @@ test_jit_hooks() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
# Shellcheck doesn't like it when you pass no arguments to a function
|
||||
# that can take args. See https://www.shellcheck.net/wiki/SC2120
|
||||
# shellcheck disable=SC2120
|
||||
checkout_install_torchbench() {
|
||||
local commit
|
||||
commit=$(cat .ci/docker/ci_commit_pins/torchbench.txt)
|
||||
git clone https://github.com/pytorch/benchmark torchbench
|
||||
pushd torchbench
|
||||
git checkout "$commit"
|
||||
|
||||
if [ "$1" ]; then
|
||||
python install.py --continue_on_fail models "$@"
|
||||
else
|
||||
# Occasionally the installation may fail on one model but it is ok to continue
|
||||
# to install and test other models
|
||||
python install.py --continue_on_fail
|
||||
fi
|
||||
|
||||
echo "Print all dependencies after TorchBench is installed"
|
||||
python -mpip freeze
|
||||
popd
|
||||
}
|
||||
|
||||
torchbench_setup_macos() {
|
||||
git clone --recursive https://github.com/pytorch/vision torchvision
|
||||
git clone --recursive https://github.com/pytorch/audio torchaudio
|
||||
@ -179,8 +202,6 @@ torchbench_setup_macos() {
|
||||
USE_OPENMP=0 python setup.py develop
|
||||
popd
|
||||
|
||||
# Shellcheck doesn't like it when you pass no arguments to a function that can take args. See https://www.shellcheck.net/wiki/SC2120
|
||||
# shellcheck disable=SC2119,SC2120
|
||||
checkout_install_torchbench
|
||||
}
|
||||
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
CREATE_TEST_CERT="$(dirname "${BASH_SOURCE[0]}")/create_test_cert.py"
|
||||
TMP_CERT_DIR=$(python "$CREATE_TEST_CERT")
|
||||
|
||||
openssl verify -CAfile "${TMP_CERT_DIR}/ca.pem" "${TMP_CERT_DIR}/cert.pem"
|
||||
|
||||
export GLOO_DEVICE_TRANSPORT=TCP_TLS
|
||||
export GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY=${TMP_CERT_DIR}/pkey.key
|
||||
export GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT=${TMP_CERT_DIR}/cert.pem
|
||||
export GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE=${TMP_CERT_DIR}/ca.pem
|
||||
|
||||
time python test/run_test.py --include distributed/test_c10d_gloo --verbose -- ProcessGroupGlooTest
|
||||
|
||||
unset GLOO_DEVICE_TRANSPORT
|
||||
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_PKEY
|
||||
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CERT
|
||||
unset GLOO_DEVICE_TRANSPORT_TCP_TLS_CA_FILE
|
||||
@ -385,6 +385,29 @@ def smoke_test_compile(device: str = "cpu") -> None:
|
||||
x_pt2 = torch.compile(model, mode="max-autotune")(x)
|
||||
|
||||
|
||||
def smoke_test_nvshmem() -> None:
|
||||
if not torch.cuda.is_available():
|
||||
print("CUDA is not available, skipping NVSHMEM test")
|
||||
return
|
||||
|
||||
# Check if NVSHMEM is compiled in current build
|
||||
try:
|
||||
from torch._C._distributed_c10d import _is_nvshmem_available
|
||||
except ImportError:
|
||||
# Not built with NVSHMEM support.
|
||||
# torch is not compiled with NVSHMEM prior to 2.9
|
||||
if torch.__version__ < "2.9":
|
||||
return
|
||||
else:
|
||||
# After 2.9: NVSHMEM is expected to be compiled in current build
|
||||
raise RuntimeError("torch not compiled with NVSHMEM") from None
|
||||
|
||||
print("torch compiled with NVSHMEM")
|
||||
|
||||
# Check if NVSHMEM is available on current system.
|
||||
print(f"NVSHMEM available at run time: {_is_nvshmem_available()}")
|
||||
|
||||
|
||||
def smoke_test_modules():
|
||||
cwd = os.getcwd()
|
||||
for module in MODULES:
|
||||
@ -479,6 +502,8 @@ def main() -> None:
|
||||
options.pypi_pkg_check,
|
||||
)
|
||||
|
||||
smoke_test_nvshmem()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@ -462,7 +462,7 @@ test_inductor_aoti() {
|
||||
# rebuild with the build cache with `BUILD_AOT_INDUCTOR_TEST` enabled
|
||||
/usr/bin/env CMAKE_FRESH=1 BUILD_AOT_INDUCTOR_TEST=1 "${BUILD_COMMAND[@]}"
|
||||
|
||||
/usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference -dist=loadfile
|
||||
/usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference cpp/test_vec_half_AVX2 -dist=loadfile
|
||||
}
|
||||
|
||||
test_inductor_cpp_wrapper_shard() {
|
||||
@ -627,6 +627,8 @@ test_perf_for_dashboard() {
|
||||
device=cuda_a10g
|
||||
elif [[ "${TEST_CONFIG}" == *h100* ]]; then
|
||||
device=cuda_h100
|
||||
elif [[ "${TEST_CONFIG}" == *b200* ]]; then
|
||||
device=cuda_b200
|
||||
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
|
||||
device=rocm
|
||||
fi
|
||||
@ -801,6 +803,16 @@ test_dynamo_benchmark() {
|
||||
if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
|
||||
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
|
||||
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
|
||||
# TODO (huydhn): Just smoke test some sample models
|
||||
if [[ "${TEST_CONFIG}" == *b200* ]]; then
|
||||
if [[ "${suite}" == "huggingface" ]]; then
|
||||
export TORCHBENCH_ONLY_MODELS="DistillGPT2"
|
||||
elif [[ "${suite}" == "timm_models" ]]; then
|
||||
export TORCHBENCH_ONLY_MODELS="inception_v3"
|
||||
elif [[ "${suite}" == "torchbench" ]]; then
|
||||
export TORCHBENCH_ONLY_MODELS="hf_Bert"
|
||||
fi
|
||||
fi
|
||||
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
|
||||
else
|
||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||
@ -928,12 +940,6 @@ test_torchbench_gcp_smoketest(){
|
||||
popd
|
||||
}
|
||||
|
||||
test_python_gloo_with_tls() {
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
|
||||
test_aten() {
|
||||
# Test ATen
|
||||
# The following test(s) of ATen have already been skipped by caffe2 in rocm environment:
|
||||
@ -980,6 +986,8 @@ test_without_numpy() {
|
||||
if [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
|
||||
python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;torch.compile(lambda x:print(x))('Hello World')"
|
||||
fi
|
||||
# Regression test for https://github.com/pytorch/pytorch/pull/157734 (torch.onnx should be importable without numpy)
|
||||
python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch; import torch.onnx"
|
||||
popd
|
||||
}
|
||||
|
||||
@ -1043,20 +1051,10 @@ test_libtorch_api() {
|
||||
mkdir -p $TEST_REPORTS_DIR
|
||||
|
||||
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" "$TORCH_BIN_DIR"/test_api --gtest_filter='-IMethodTest.*' --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml
|
||||
"$TORCH_BIN_DIR"/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml
|
||||
else
|
||||
# Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy
|
||||
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest"
|
||||
|
||||
# On s390x, pytorch is built without llvm.
|
||||
# Even if it would be built with llvm, llvm currently doesn't support used features on s390x and
|
||||
# test fails with errors like:
|
||||
# JIT session error: Unsupported target machine architecture in ELF object pytorch-jitted-objectbuffer
|
||||
# unknown file: Failure
|
||||
# C++ exception with description "valOrErr INTERNAL ASSERT FAILED at "/var/lib/jenkins/workspace/torch/csrc/jit/tensorexpr/llvm_jit.h":34, please report a bug to PyTorch. Unexpected failure in LLVM JIT: Failed to materialize symbols: { (main, { func }) }
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
|
||||
python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
|
||||
fi
|
||||
fi
|
||||
|
||||
# quantization is not fully supported on s390x yet
|
||||
@ -1324,10 +1322,13 @@ EOF
|
||||
|
||||
# Step 2. Make sure that the public API test "test_correct_module_names" fails when an existing
|
||||
# file is modified to introduce an invalid public API function.
|
||||
EXISTING_FILEPATH="${TORCH_INSTALL_DIR}/nn/parameter.py"
|
||||
# The filepath here must not have __all__ defined in it, otherwise the test will pass.
|
||||
# If your PR introduces __all__ to torch/cuda/streams.py please point this to another file
|
||||
# that does not have __all__ defined.
|
||||
EXISTING_FILEPATH="${TORCH_INSTALL_DIR}/cuda/streams.py"
|
||||
cp -v "${EXISTING_FILEPATH}" "${EXISTING_FILEPATH}.orig"
|
||||
echo "${BAD_PUBLIC_FUNC}" >> "${EXISTING_FILEPATH}"
|
||||
invalid_api="torch.nn.parameter.new_public_func"
|
||||
invalid_api="torch.cuda.streams.new_public_func"
|
||||
echo "Appended an invalid public API function to existing file ${EXISTING_FILEPATH}..."
|
||||
|
||||
check_public_api_test_fails \
|
||||
@ -1561,7 +1562,7 @@ test_executorch() {
|
||||
test_linux_aarch64() {
|
||||
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
|
||||
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
|
||||
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops test_cpp_extensions_open_device_registration \
|
||||
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops \
|
||||
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||
|
||||
# Dynamo tests
|
||||
@ -1673,13 +1674,11 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
|
||||
elif [[ "${TEST_CONFIG}" == cachebench ]]; then
|
||||
install_torchaudio
|
||||
install_torchvision
|
||||
checkout_install_torchbench nanogpt BERT_pytorch resnet50 hf_T5 llama moco
|
||||
PYTHONPATH=$(pwd)/torchbench test_cachebench
|
||||
PYTHONPATH=/torchbench test_cachebench
|
||||
elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
|
||||
install_torchaudio
|
||||
install_torchvision
|
||||
checkout_install_torchbench nanogpt
|
||||
PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
|
||||
PYTHONPATH=/torchbench test_verify_cachebench
|
||||
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
install_torchaudio
|
||||
install_torchvision
|
||||
@ -1688,28 +1687,22 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
# https://github.com/opencv/opencv-python/issues/885
|
||||
pip_install opencv-python==4.8.0.74
|
||||
if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
|
||||
checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
|
||||
PYTHONPATH=/torchbench test_inductor_torchbench_smoketest_perf
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
|
||||
checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \
|
||||
llama_v2_7b_16h resnet50 timm_efficientnet mobilenet_v3_large timm_resnest \
|
||||
functorch_maml_omniglot yolov3 mobilenet_v2 resnext50_32x4d densenet121 mnasnet1_0
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf
|
||||
PYTHONPATH=/torchbench test_inductor_torchbench_cpu_smoketest_perf
|
||||
elif [[ "${TEST_CONFIG}" == *torchbench_gcp_smoketest* ]]; then
|
||||
checkout_install_torchbench
|
||||
TORCHBENCHPATH=$(pwd)/torchbench test_torchbench_gcp_smoketest
|
||||
TORCHBENCHPATH=/torchbench test_torchbench_gcp_smoketest
|
||||
else
|
||||
checkout_install_torchbench
|
||||
# Do this after checkout_install_torchbench to ensure we clobber any
|
||||
# nightlies that torchbench may pull in
|
||||
if [[ "${TEST_CONFIG}" != *cpu* ]]; then
|
||||
install_torchrec_and_fbgemm
|
||||
fi
|
||||
PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
|
||||
PYTHONPATH=/torchbench test_dynamo_benchmark torchbench "$id"
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
||||
install_torchvision
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
|
||||
PYTHONPATH=/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
|
||||
if [[ "$SHARD_NUMBER" -eq "1" ]]; then
|
||||
test_inductor_aoti
|
||||
fi
|
||||
|
||||
@ -192,9 +192,6 @@ retry brew install libomp
|
||||
# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule
|
||||
export USE_DISTRIBUTED=1
|
||||
|
||||
if [[ -n "$CROSS_COMPILE_ARM64" ]]; then
|
||||
export CMAKE_OSX_ARCHITECTURES=arm64
|
||||
fi
|
||||
export USE_MKLDNN=OFF
|
||||
export USE_QNNPACK=OFF
|
||||
export BUILD_TEST=OFF
|
||||
@ -202,16 +199,7 @@ export BUILD_TEST=OFF
|
||||
pushd "$pytorch_rootdir"
|
||||
echo "Calling setup.py bdist_wheel at $(date)"
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
echo "Calling setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||
BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 python setup.py bdist_wheel -d "$whl_tmp_dir"
|
||||
echo "Finished setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||
echo "Calling setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||
BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 CMAKE_FRESH=1 python setup.py bdist_wheel -d "$whl_tmp_dir"
|
||||
echo "Finished setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||
else
|
||||
python setup.py bdist_wheel -d "$whl_tmp_dir"
|
||||
fi
|
||||
python setup.py bdist_wheel -d "$whl_tmp_dir"
|
||||
|
||||
echo "Finished setup.py bdist_wheel at $(date)"
|
||||
|
||||
|
||||
@ -65,16 +65,8 @@ fi
|
||||
|
||||
if [[ "$PACKAGE_TYPE" != libtorch ]]; then
|
||||
if [[ "\$BUILD_ENVIRONMENT" != *s390x* ]]; then
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pkg_no_python="$(ls -1 /final_pkgs/torch_no_python* | sort |tail -1)"
|
||||
pkg_torch="$(ls -1 /final_pkgs/torch-* | sort |tail -1)"
|
||||
# todo: after folder is populated use the pypi_pkg channel instead
|
||||
pip install "\$pkg_no_python" "\$pkg_torch" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}_pypi_pkg"
|
||||
retry pip install -q numpy protobuf typing-extensions
|
||||
else
|
||||
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
|
||||
retry pip install -q numpy protobuf typing-extensions
|
||||
fi
|
||||
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
|
||||
retry pip install -q numpy protobuf typing-extensions
|
||||
else
|
||||
pip install "\$pkg"
|
||||
retry pip install -q numpy protobuf typing-extensions
|
||||
|
||||
@ -134,7 +134,6 @@ export DESIRED_PYTHON="${DESIRED_PYTHON:-}"
|
||||
export DESIRED_CUDA="$DESIRED_CUDA"
|
||||
export LIBTORCH_VARIANT="${LIBTORCH_VARIANT:-}"
|
||||
export BUILD_PYTHONLESS="${BUILD_PYTHONLESS:-}"
|
||||
export USE_SPLIT_BUILD="${USE_SPLIT_BUILD:-}"
|
||||
if [[ "${OSTYPE}" == "msys" ]]; then
|
||||
export LIBTORCH_CONFIG="${LIBTORCH_CONFIG:-}"
|
||||
if [[ "${LIBTORCH_CONFIG:-}" == 'debug' ]]; then
|
||||
|
||||
@ -23,10 +23,6 @@ if [[ "${DRY_RUN}" = "disabled" ]]; then
|
||||
AWS_S3_CP="aws s3 cp"
|
||||
fi
|
||||
|
||||
if [[ "${USE_SPLIT_BUILD:-false}" == "true" ]]; then
|
||||
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_pypi_pkg"
|
||||
fi
|
||||
|
||||
# this is special build with all dependencies packaged
|
||||
if [[ ${BUILD_NAME} == *-full* ]]; then
|
||||
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_full"
|
||||
|
||||
4
.flake8
4
.flake8
@ -7,12 +7,12 @@ max-line-length = 120
|
||||
# C408 ignored because we like the dict keyword argument syntax
|
||||
# E501 is not flexible enough, we're using B950 instead
|
||||
ignore =
|
||||
E203,E305,E402,E501,E704,E721,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,
|
||||
E203,E305,E402,E501,E704,E721,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,F824,
|
||||
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
|
||||
# to line this up with executable bit
|
||||
EXE001,
|
||||
# these ignores are from flake8-bugbear; please fix!
|
||||
B007,B008,B017,B019,B023,B028,B903,B904,B905,B906,B907
|
||||
B007,B008,B017,B019,B023,B028,B903,B904,B905,B906,B907,B908,B910
|
||||
# these ignores are from flake8-comprehensions; please fix!
|
||||
C407,
|
||||
# these ignores are from flake8-logging-format; please fix!
|
||||
|
||||
10
.github/actionlint.yaml
vendored
10
.github/actionlint.yaml
vendored
@ -53,16 +53,12 @@ self-hosted-runner:
|
||||
- linux.rocm.gpu.mi250
|
||||
- linux.rocm.gpu.2
|
||||
- linux.rocm.gpu.4
|
||||
# MI300 runners
|
||||
- linux.rocm.gpu.mi300.2
|
||||
- linux.rocm.gpu.mi300.4
|
||||
# gfx942 runners
|
||||
- linux.rocm.gpu.gfx942.2
|
||||
- linux.rocm.gpu.gfx942.4
|
||||
- rocm-docker
|
||||
# Repo-specific Apple hosted runners
|
||||
- macos-m1-ultra
|
||||
- macos-m2-14
|
||||
# Org wise AWS `mac2.metal` runners (2020 Mac mini hardware powered by Apple silicon M1 processors)
|
||||
- macos-m1-stable
|
||||
- macos-m1-13
|
||||
- macos-m1-14
|
||||
# GitHub-hosted MacOS runners
|
||||
- macos-latest-xlarge
|
||||
|
||||
78
.github/actions/build-android/action.yml
vendored
78
.github/actions/build-android/action.yml
vendored
@ -1,78 +0,0 @@
|
||||
name: build android
|
||||
|
||||
description: build android for a specific arch
|
||||
|
||||
inputs:
|
||||
arch:
|
||||
description: arch to build
|
||||
required: true
|
||||
arch-for-build-env:
|
||||
description: |
|
||||
arch to pass to build environment.
|
||||
This is currently different than the arch name we use elsewhere, which
|
||||
should be fixed.
|
||||
required: true
|
||||
github-secret:
|
||||
description: github token
|
||||
required: true
|
||||
build-environment:
|
||||
required: true
|
||||
description: Top-level label for what's being built/tested.
|
||||
docker-image:
|
||||
required: true
|
||||
description: Name of the base docker image to build with.
|
||||
branch:
|
||||
required: true
|
||||
description: What branch we are building on.
|
||||
outputs:
|
||||
container_id:
|
||||
description: Docker container identifier used to build the artifacts
|
||||
value: ${{ steps.build.outputs.container_id }}
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Build-${{ inputs.arch }}
|
||||
id: build
|
||||
shell: bash
|
||||
env:
|
||||
BRANCH: ${{ inputs.branch }}
|
||||
BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-${{ inputs.arch-for-build-env }}-build"
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
SCCACHE_REGION: us-east-1
|
||||
DOCKER_IMAGE: ${{ inputs.docker-image }}
|
||||
MATRIX_ARCH: ${{ inputs.arch }}
|
||||
run: |
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
set -exo pipefail
|
||||
export container_name
|
||||
container_name=$(docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e PR_NUMBER \
|
||||
-e SHA1 \
|
||||
-e BRANCH \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e SCCACHE_REGION \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--tty \
|
||||
--detach \
|
||||
--user jenkins \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
git submodule sync && git submodule update -q --init --recursive --depth 1
|
||||
docker cp "${GITHUB_WORKSPACE}/." "${container_name}:/var/lib/jenkins/workspace"
|
||||
(echo "sudo chown -R jenkins . && .ci/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete" | docker exec -u jenkins -i "${container_name}" bash) 2>&1
|
||||
|
||||
# Copy install binaries back
|
||||
mkdir -p "${GITHUB_WORKSPACE}/build_android_install_${MATRIX_ARCH}"
|
||||
docker cp "${container_name}:/var/lib/jenkins/workspace/build_android/install" "${GITHUB_WORKSPACE}/build_android_install_${MATRIX_ARCH}"
|
||||
echo "container_id=${container_name}" >> "${GITHUB_OUTPUT}"
|
||||
@ -70,7 +70,7 @@ runs:
|
||||
set -eux
|
||||
# PyYAML 6.0 doesn't work with MacOS x86 anymore
|
||||
# This must run on Python-3.7 (AmazonLinux2) so can't use request=3.32.2
|
||||
python3 -m pip install requests==2.27.1 pyyaml==6.0.1
|
||||
python3 -m pip install requests==2.27.1 pyyaml==6.0.2
|
||||
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
|
||||
@ -24,7 +24,6 @@ runs:
|
||||
-e PYTORCH_FINAL_PACKAGE_DIR \
|
||||
-e PYTORCH_ROOT \
|
||||
-e SKIP_ALL_TESTS \
|
||||
-e USE_SPLIT_BUILD \
|
||||
--tty \
|
||||
--detach \
|
||||
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
||||
|
||||
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
||||
b6a3368a45aaafe05f1a6a9f10c68adc5e944d9e
|
||||
e500f0cf88bc57ffd8b0029033da305eef24ae25
|
||||
|
||||
2
.github/ci_commit_pins/vllm.txt
vendored
2
.github/ci_commit_pins/vllm.txt
vendored
@ -1 +1 @@
|
||||
b77c7d327f2a463bb9ef8be36f30e920bc066502
|
||||
35afe1b30b154114dc2ee8329e12f8cf3fe9f576
|
||||
|
||||
2
.github/ci_commit_pins/xla.txt
vendored
2
.github/ci_commit_pins/xla.txt
vendored
@ -1 +1 @@
|
||||
1c00dea2c9adb2137903c86b4191e8c247f8fda9
|
||||
095faec1e7b6cc47220181e74ae9cde2605f9b00
|
||||
|
||||
19
.github/merge_rules.yaml
vendored
19
.github/merge_rules.yaml
vendored
@ -131,21 +131,6 @@
|
||||
- Lint
|
||||
- pull
|
||||
|
||||
- name: Mobile
|
||||
patterns:
|
||||
- ios/**
|
||||
- android/**
|
||||
- test/mobile/**
|
||||
approved_by:
|
||||
- linbinyu
|
||||
- IvanKobzarev
|
||||
- dreiss
|
||||
- raziel
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
- pull
|
||||
|
||||
- name: PrimTorch
|
||||
patterns:
|
||||
- torch/_meta_registrations.py
|
||||
@ -503,6 +488,10 @@
|
||||
- torch/_dynamo/**
|
||||
- torch/csrc/dynamo/**
|
||||
- test/dynamo/**
|
||||
- test/dynamo_expected_failures/**
|
||||
- test/dynamo_skips/**
|
||||
- test/inductor_expected_failures/**
|
||||
- test/inductor_skips/**
|
||||
approved_by:
|
||||
- guilhermeleobas
|
||||
mandatory_checks_name:
|
||||
|
||||
6
.github/requirements-gha-cache.txt
vendored
6
.github/requirements-gha-cache.txt
vendored
@ -7,9 +7,9 @@
|
||||
# .ci/docker/requirements-ci.txt
|
||||
boto3==1.35.42
|
||||
jinja2==3.1.6
|
||||
lintrunner==0.10.7
|
||||
lintrunner==0.12.7
|
||||
ninja==1.10.0.post1
|
||||
nvidia-ml-py==11.525.84
|
||||
pyyaml==6.0
|
||||
pyyaml==6.0.2
|
||||
requests==2.32.4
|
||||
rich==10.9.0
|
||||
rich==14.1.0
|
||||
|
||||
@ -2,7 +2,7 @@ boto3==1.35.42
|
||||
cmake==3.27.*
|
||||
expecttest==0.3.0
|
||||
fbscribelogger==0.1.7
|
||||
filelock==3.6.0
|
||||
filelock==3.18.0
|
||||
hypothesis==6.56.4
|
||||
librosa>=0.6.2
|
||||
mpmath==1.3.0
|
||||
|
||||
18
.github/scripts/generate_binary_build_matrix.py
vendored
18
.github/scripts/generate_binary_build_matrix.py
vendored
@ -193,7 +193,7 @@ LIBTORCH_CONTAINER_IMAGES: dict[str, str] = {
|
||||
"cpu": "libtorch-cxx11-builder:cpu",
|
||||
}
|
||||
|
||||
FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
|
||||
FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"]
|
||||
|
||||
|
||||
def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
|
||||
@ -273,7 +273,6 @@ def generate_wheels_matrix(
|
||||
os: str,
|
||||
arches: Optional[list[str]] = None,
|
||||
python_versions: Optional[list[str]] = None,
|
||||
use_split_build: bool = False,
|
||||
) -> list[dict[str, str]]:
|
||||
package_type = "wheel"
|
||||
if os == "linux" or os == "linux-aarch64" or os == "linux-s390x":
|
||||
@ -315,15 +314,11 @@ def generate_wheels_matrix(
|
||||
# TODO: Enable python 3.13t on cpu-s390x
|
||||
if gpu_arch_type == "cpu-s390x" and python_version == "3.13t":
|
||||
continue
|
||||
|
||||
if use_split_build and (
|
||||
arch_version not in ["12.6", "12.8", "12.9", "cpu"] or os != "linux"
|
||||
# TODO: Enable python 3.14 on non linux OSes
|
||||
if os != "linux" and (
|
||||
python_version == "3.14" or python_version == "3.14t"
|
||||
):
|
||||
raise RuntimeError(
|
||||
"Split build is only supported on linux with cuda 12* and cpu.\n"
|
||||
f"Currently attempting to build on arch version {arch_version} and os {os}.\n"
|
||||
"Please modify the matrix generation to exclude this combination."
|
||||
)
|
||||
continue
|
||||
|
||||
# cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
|
||||
|
||||
@ -339,7 +334,6 @@ def generate_wheels_matrix(
|
||||
"gpu_arch_type": gpu_arch_type,
|
||||
"gpu_arch_version": gpu_arch_version,
|
||||
"desired_cuda": desired_cuda,
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version].split(
|
||||
":"
|
||||
)[0],
|
||||
@ -372,7 +366,6 @@ def generate_wheels_matrix(
|
||||
"desired_cuda": translate_desired_cuda(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[
|
||||
arch_version
|
||||
].split(":")[0],
|
||||
@ -395,7 +388,6 @@ def generate_wheels_matrix(
|
||||
"desired_cuda": translate_desired_cuda(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version].split(
|
||||
":"
|
||||
)[0],
|
||||
|
||||
42
.github/scripts/generate_ci_workflows.py
vendored
42
.github/scripts/generate_ci_workflows.py
vendored
@ -59,9 +59,7 @@ class BinaryBuildWorkflow:
|
||||
is_scheduled: str = ""
|
||||
branches: str = "nightly"
|
||||
# Mainly for macos
|
||||
cross_compile_arm64: bool = False
|
||||
macos_runner: str = "macos-14-xlarge"
|
||||
use_split_build: bool = False
|
||||
# Mainly used for libtorch builds
|
||||
build_variant: str = ""
|
||||
|
||||
@ -72,9 +70,6 @@ class BinaryBuildWorkflow:
|
||||
for item in [self.os, "binary", self.package_type, self.build_variant]
|
||||
if item != ""
|
||||
)
|
||||
if self.use_split_build:
|
||||
# added to distinguish concurrency groups
|
||||
self.build_environment += "-split"
|
||||
|
||||
def generate_workflow_file(self, workflow_template: jinja2.Template) -> None:
|
||||
output_file_path = (
|
||||
@ -117,21 +112,6 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
# See https://github.com/pytorch/pytorch/issues/138750
|
||||
# BinaryBuildWorkflow(
|
||||
# os=OperatingSystem.LINUX,
|
||||
# package_type="manywheel",
|
||||
# build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
# OperatingSystem.LINUX,
|
||||
# use_split_build=True,
|
||||
# arches=["11.8", "12.1", "12.4", "cpu"],
|
||||
# ),
|
||||
# ciflow_config=CIFlowConfig(
|
||||
# labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
|
||||
# isolated_workflow=True,
|
||||
# ),
|
||||
# use_split_build=True,
|
||||
# ),
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.LINUX,
|
||||
package_type="libtorch",
|
||||
@ -175,27 +155,11 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||
package_type="manywheel",
|
||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
arches=["12.6", "12.8", "12.9"],
|
||||
python_versions=["3.9"],
|
||||
arches=["12.8"],
|
||||
python_versions=["3.12"],
|
||||
),
|
||||
branches="main",
|
||||
),
|
||||
# See https://github.com/pytorch/pytorch/issues/138750
|
||||
# BinaryBuildWorkflow(
|
||||
# os=OperatingSystem.LINUX,
|
||||
# package_type="manywheel",
|
||||
# build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
# OperatingSystem.LINUX,
|
||||
# arches=["11.8", "12.1", "12.4"],
|
||||
# python_versions=["3.9"],
|
||||
# use_split_build=True,
|
||||
# ),
|
||||
# ciflow_config=CIFlowConfig(
|
||||
# labels={LABEL_CIFLOW_PERIODIC},
|
||||
# ),
|
||||
# branches="main",
|
||||
# use_split_build=True,
|
||||
# ),
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.LINUX,
|
||||
package_type="libtorch",
|
||||
@ -338,7 +302,6 @@ MACOS_BINARY_BUILD_WORKFLOWS = [
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
cross_compile_arm64=False,
|
||||
macos_runner="macos-14-xlarge",
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
@ -351,7 +314,6 @@ MACOS_BINARY_BUILD_WORKFLOWS = [
|
||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
OperatingSystem.MACOS_ARM64
|
||||
),
|
||||
cross_compile_arm64=False,
|
||||
macos_runner="macos-14-xlarge",
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
|
||||
|
||||
2
.github/scripts/lintrunner.sh
vendored
2
.github/scripts/lintrunner.sh
vendored
@ -2,7 +2,7 @@
|
||||
set -ex
|
||||
|
||||
# Use uv to speed up lintrunner init
|
||||
python3 -m pip install uv==0.1.45 setuptools
|
||||
python3 -m pip install -U uv==0.8.* setuptools
|
||||
|
||||
CACHE_DIRECTORY="/tmp/.lintbin"
|
||||
# Try to recover the cached binaries
|
||||
|
||||
7
.github/scripts/runner_determinator.py
vendored
7
.github/scripts/runner_determinator.py
vendored
@ -262,7 +262,12 @@ def is_exception_branch(branch: str) -> bool:
|
||||
"""
|
||||
Branches that get opted out of experiments by default, until they're explicitly enabled.
|
||||
"""
|
||||
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
|
||||
return branch.split("/", maxsplit=1)[0] in {
|
||||
"main",
|
||||
"nightly",
|
||||
"release",
|
||||
"landchecks",
|
||||
}
|
||||
|
||||
|
||||
def load_yaml(yaml_text: str) -> Any:
|
||||
|
||||
4
.github/scripts/trymerge.py
vendored
4
.github/scripts/trymerge.py
vendored
@ -1891,7 +1891,9 @@ def validate_revert(
|
||||
else pr.get_comment_by_id(comment_id)
|
||||
)
|
||||
if comment.editor_login is not None:
|
||||
raise PostCommentError("Don't want to revert based on edited command")
|
||||
raise PostCommentError(
|
||||
"Halting the revert as the revert comment has been edited."
|
||||
)
|
||||
author_association = comment.author_association
|
||||
author_login = comment.author_login
|
||||
allowed_reverters = ["COLLABORATOR", "MEMBER", "OWNER"]
|
||||
|
||||
@ -47,9 +47,6 @@ env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SKIP_ALL_TESTS: 0
|
||||
{%- if cross_compile_arm64 %}
|
||||
CROSS_COMPILE_ARM64: 1
|
||||
{% endif %}
|
||||
!{{ common.concurrency(build_environment) }}
|
||||
|
||||
jobs:
|
||||
|
||||
5
.github/templates/upload.yml.j2
vendored
5
.github/templates/upload.yml.j2
vendored
@ -25,11 +25,6 @@
|
||||
DOCKER_IMAGE: !{{ config["container_image"] }}
|
||||
DOCKER_IMAGE_TAG_PREFIX: !{{ config["container_image_tag_prefix"] }}
|
||||
{%- endif %}
|
||||
{%- if config["package_type"] == "manywheel" %}
|
||||
{%- if config.use_split_build is defined %}
|
||||
use_split_build: !{{ config["use_split_build"] }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if config["package_type"] == "libtorch" %}
|
||||
{%- if config["libtorch_config"] %}
|
||||
LIBTORCH_CONFIG: !{{ config["libtorch_config"] }}
|
||||
|
||||
10
.github/workflows/_binary-build-linux.yml
vendored
10
.github/workflows/_binary-build-linux.yml
vendored
@ -26,13 +26,6 @@ on:
|
||||
default: 240
|
||||
type: number
|
||||
description: timeout for the job
|
||||
use_split_build:
|
||||
description: |
|
||||
[Experimental] Build a libtorch only wheel and build pytorch such that
|
||||
are built from the libtorch wheel.
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
ALPINE_IMAGE:
|
||||
required: false
|
||||
type: string
|
||||
@ -117,7 +110,6 @@ jobs:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
|
||||
steps:
|
||||
- name: Make the env permanent during this workflow (but not the secrets)
|
||||
shell: bash
|
||||
@ -142,7 +134,6 @@ jobs:
|
||||
echo "PR_NUMBER=${{ env.PR_NUMBER }}"
|
||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
echo "SHA1=${{ env.SHA1 }}"
|
||||
echo "USE_SPLIT_BUILD=${{ env.use_split_build }}"
|
||||
} >> "${GITHUB_ENV} }}"
|
||||
|
||||
- name: List the env
|
||||
@ -261,7 +252,6 @@ jobs:
|
||||
-e PYTORCH_ROOT \
|
||||
-e SKIP_ALL_TESTS \
|
||||
-e PYTORCH_EXTRA_INSTALL_REQUIREMENTS \
|
||||
-e USE_SPLIT_BUILD \
|
||||
--tty \
|
||||
--detach \
|
||||
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
|
||||
|
||||
9
.github/workflows/_binary-test-linux.yml
vendored
9
.github/workflows/_binary-test-linux.yml
vendored
@ -64,13 +64,6 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: Hardware to run this job on. Valid values are linux.4xlarge, linux.4xlarge.nvidia.gpu, linux.arm64.2xlarge, and linux.rocm.gpu
|
||||
use_split_build:
|
||||
description: |
|
||||
[Experimental] Build a libtorch only wheel and build pytorch such that
|
||||
are built from the libtorch wheel.
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
secrets:
|
||||
github-token:
|
||||
required: true
|
||||
@ -104,7 +97,6 @@ jobs:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
|
||||
steps:
|
||||
- name: Make the env permanent during this workflow (but not the secrets)
|
||||
shell: bash
|
||||
@ -129,7 +121,6 @@ jobs:
|
||||
echo "PR_NUMBER=${{ env.PR_NUMBER }}"
|
||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
echo "SHA1=${{ env.SHA1 }}"
|
||||
echo "USE_SPLIT_BUILD=${{ env.USE_SPLIT_BUILD }}"
|
||||
} >> "${GITHUB_ENV} }}"
|
||||
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
|
||||
8
.github/workflows/_binary-upload.yml
vendored
8
.github/workflows/_binary-upload.yml
vendored
@ -51,13 +51,6 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
description: Desired python version
|
||||
use_split_build:
|
||||
description: |
|
||||
[Experimental] Build a libtorch only wheel and build pytorch such that
|
||||
are built from the libtorch wheel.
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
secrets:
|
||||
github-token:
|
||||
required: true
|
||||
@ -86,7 +79,6 @@ jobs:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
|
||||
1
.github/workflows/_linux-build.yml
vendored
1
.github/workflows/_linux-build.yml
vendored
@ -306,7 +306,6 @@ jobs:
|
||||
-e OUR_GITHUB_JOB_ID \
|
||||
-e HUGGING_FACE_HUB_TOKEN \
|
||||
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
|
||||
-e USE_SPLIT_BUILD \
|
||||
-e BUILD_ADDITIONAL_PACKAGES \
|
||||
--memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \
|
||||
--memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \
|
||||
|
||||
20
.github/workflows/_linux-test.yml
vendored
20
.github/workflows/_linux-test.yml
vendored
@ -96,7 +96,7 @@ jobs:
|
||||
steps:
|
||||
- name: Setup SSH (Click me for login details)
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@main
|
||||
if: ${{ matrix.runner != 'B200' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
|
||||
if: ${{ !contains(matrix.runner, 'b200') && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
|
||||
with:
|
||||
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
||||
instructions: |
|
||||
@ -109,7 +109,7 @@ jobs:
|
||||
no-sudo: true
|
||||
|
||||
- name: Setup Python
|
||||
if: matrix.runner == 'B200'
|
||||
if: contains(matrix.runner, 'b200')
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
@ -117,7 +117,7 @@ jobs:
|
||||
|
||||
- name: Setup Linux
|
||||
uses: ./.github/actions/setup-linux
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && matrix.runner != 'B200'
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && !contains(matrix.runner, 'b200')
|
||||
|
||||
- name: configure aws credentials
|
||||
if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
|
||||
@ -128,7 +128,7 @@ jobs:
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
if: ${{ inputs.aws-role-to-assume != '' && matrix.runner == 'B200' }}
|
||||
if: ${{ inputs.aws-role-to-assume != '' && contains(matrix.runner, 'b200') }}
|
||||
id: login-ecr
|
||||
continue-on-error: true
|
||||
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
|
||||
@ -166,17 +166,17 @@ jobs:
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
||||
with:
|
||||
driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '570.133.07' }}
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && matrix.runner != 'B200' }}
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}
|
||||
|
||||
- name: Setup GPU_FLAG for docker run
|
||||
id: setup-gpu-flag
|
||||
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || matrix.runner == 'B200') }}
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || contains(matrix.runner, 'b200')) }}
|
||||
|
||||
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
|
||||
id: setup-sscache-port-flag
|
||||
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && matrix.runner != 'B200' }}
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && !contains(matrix.runner, 'b200') }}
|
||||
|
||||
- name: Lock NVIDIA A100 40GB Frequency
|
||||
run: |
|
||||
@ -277,8 +277,8 @@ jobs:
|
||||
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
|
||||
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
|
||||
# Do not set SCCACHE_S3_KEY_PREFIX to share the cache between all build jobs
|
||||
SCCACHE_BUCKET: ${{ matrix.runner != 'B200' && 'ossci-compiler-cache-circleci-v2' || '' }}
|
||||
SCCACHE_REGION: ${{ matrix.runner != 'B200' && 'us-east-1' || '' }}
|
||||
SCCACHE_BUCKET: ${{ !contains(matrix.runner, 'b200') && 'ossci-compiler-cache-circleci-v2' || '' }}
|
||||
SCCACHE_REGION: ${{ !contains(matrix.runner, 'b200') && 'us-east-1' || '' }}
|
||||
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
|
||||
DOCKER_IMAGE: ${{ inputs.docker-image }}
|
||||
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
|
||||
@ -403,7 +403,7 @@ jobs:
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
||||
|
||||
- name: Authenticate with AWS
|
||||
if: ${{ matrix.runner == 'B200' }}
|
||||
if: ${{ contains(matrix.runner, 'b200') }}
|
||||
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
|
||||
|
||||
4
.github/workflows/_rocm-test.yml
vendored
4
.github/workflows/_rocm-test.yml
vendored
@ -269,8 +269,8 @@ jobs:
|
||||
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
|
||||
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
|
||||
|
||||
- name: Change permissions (only needed for MI300 and MI355 kubernetes runners for now)
|
||||
if: ${{ always() && steps.test.conclusion && (contains(matrix.runner, 'mi300') || contains(matrix.runner, 'mi355')) }}
|
||||
- name: Change permissions (only needed for kubernetes runners for now)
|
||||
if: ${{ always() && steps.test.conclusion && (contains(matrix.runner, 'gfx942') || contains(matrix.runner, 'mi355')) }}
|
||||
run: |
|
||||
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "sudo chown -R 1001:1001 test"
|
||||
|
||||
|
||||
8
.github/workflows/build-triton-wheel.yml
vendored
8
.github/workflows/build-triton-wheel.yml
vendored
@ -50,7 +50,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t" ]
|
||||
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ]
|
||||
device: ["cuda", "rocm", "xpu", "aarch64"]
|
||||
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
|
||||
include:
|
||||
@ -126,6 +126,12 @@ jobs:
|
||||
3.13t)
|
||||
PYTHON_EXECUTABLE=/opt/python/cp313-cp313t/bin/python
|
||||
;;
|
||||
3.14)
|
||||
PYTHON_EXECUTABLE=/opt/python/cp314-cp314/bin/python
|
||||
;;
|
||||
3.14t)
|
||||
PYTHON_EXECUTABLE=/opt/python/cp314-cp314t/bin/python
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported python version ${PY_VERS}"
|
||||
exit 1
|
||||
|
||||
3
.github/workflows/check-labels.yml
vendored
3
.github/workflows/check-labels.yml
vendored
@ -34,7 +34,8 @@ jobs:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
name: Check labels
|
||||
if: github.repository_owner == 'pytorch'
|
||||
# Disabling the job until https://github.com/pytorch/pytorch/issues/159825 is resolved
|
||||
if: github.repository_owner == 'pytorch' && false
|
||||
runs-on: linux.24_04.4x
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
|
||||
@ -7,7 +7,8 @@ on:
|
||||
|
||||
jobs:
|
||||
ghstack-mergeability-check:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
# Disabling the job until https://github.com/pytorch/pytorch/issues/159825 is resolved
|
||||
if: github.repository_owner == 'pytorch' && false
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@ -56,7 +57,7 @@ jobs:
|
||||
cache: pip
|
||||
architecture: x64
|
||||
|
||||
- run: pip install pyyaml==6.0
|
||||
- run: pip install pyyaml==6.0.2
|
||||
shell: bash
|
||||
|
||||
- name: Verify mergeability
|
||||
|
||||
2
.github/workflows/cherry-pick.yml
vendored
2
.github/workflows/cherry-pick.yml
vendored
@ -26,7 +26,7 @@ jobs:
|
||||
cache: pip
|
||||
|
||||
# Not the direct dependencies but the script uses trymerge
|
||||
- run: pip install pyyaml==6.0
|
||||
- run: pip install pyyaml==6.0.2
|
||||
|
||||
- name: Setup committer id
|
||||
run: |
|
||||
|
||||
9
.github/workflows/docker-builds.yml
vendored
9
.github/workflows/docker-builds.yml
vendored
@ -51,21 +51,17 @@ jobs:
|
||||
docker-image-name: [
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm,
|
||||
pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9,
|
||||
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11,
|
||||
pytorch-linux-jammy-py3.9-clang12,
|
||||
pytorch-linux-jammy-py3.11-clang12,
|
||||
pytorch-linux-jammy-py3.12-clang12,
|
||||
pytorch-linux-jammy-py3.13-clang12,
|
||||
pytorch-linux-jammy-rocm-n-py3,
|
||||
pytorch-linux-noble-rocm-n-py3,
|
||||
pytorch-linux-noble-rocm-alpha-py3,
|
||||
pytorch-linux-jammy-rocm-n-py3-benchmarks,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12,
|
||||
pytorch-linux-jammy-py3.9-gcc11,
|
||||
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks,
|
||||
@ -76,7 +72,8 @@ jobs:
|
||||
pytorch-linux-jammy-py3-clang12-onnx,
|
||||
pytorch-linux-jammy-linter,
|
||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter,
|
||||
pytorch-linux-jammy-py3-clang12-executorch,
|
||||
# Executorch pin needs update
|
||||
# pytorch-linux-jammy-py3-clang12-executorch,
|
||||
pytorch-linux-jammy-py3.12-triton-cpu
|
||||
]
|
||||
include:
|
||||
|
||||
2
.github/workflows/docker-release.yml
vendored
2
.github/workflows/docker-release.yml
vendored
@ -144,7 +144,7 @@ jobs:
|
||||
run: |
|
||||
make -f docker.Makefile "${BUILD_IMAGE_TYPE}-image"
|
||||
- name: Push nightly tags
|
||||
if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' && matrix.build_platforms == 'linux/amd4' }}
|
||||
if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' && matrix.platform == 'linux/amd4' }}
|
||||
run: |
|
||||
PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-cuda${CUDA_VERSION_SHORT}-cudnn${CUDNN_VERSION}-runtime"
|
||||
CUDA_SUFFIX="-cu${CUDA_VERSION}"
|
||||
|
||||
30
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
30
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -60,7 +60,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -84,7 +83,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
@ -108,7 +106,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cpu-aarch64
|
||||
secrets:
|
||||
@ -129,7 +126,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -156,7 +152,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cuda-aarch64-12_9
|
||||
secrets:
|
||||
@ -176,7 +171,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -200,7 +194,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
@ -224,7 +217,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cpu-aarch64
|
||||
secrets:
|
||||
@ -245,7 +237,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -272,7 +263,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_9
|
||||
secrets:
|
||||
@ -292,7 +282,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -316,7 +305,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
@ -340,7 +328,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cpu-aarch64
|
||||
secrets:
|
||||
@ -361,7 +348,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -388,7 +374,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_9
|
||||
secrets:
|
||||
@ -408,7 +393,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -432,7 +416,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
@ -456,7 +439,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cpu-aarch64
|
||||
secrets:
|
||||
@ -477,7 +459,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -504,7 +485,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_9
|
||||
secrets:
|
||||
@ -524,7 +504,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -548,7 +527,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13"
|
||||
build_name: manywheel-py3_13-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
@ -572,7 +550,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13"
|
||||
build_name: manywheel-py3_13-cpu-aarch64
|
||||
secrets:
|
||||
@ -593,7 +570,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -620,7 +596,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_9
|
||||
secrets:
|
||||
@ -640,7 +615,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -664,7 +638,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
@ -688,7 +661,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-aarch64
|
||||
DOCKER_IMAGE: manylinux2_28_aarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cpu-aarch64
|
||||
secrets:
|
||||
@ -709,7 +681,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
@ -736,7 +707,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda-aarch64
|
||||
DOCKER_IMAGE: manylinuxaarch64-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_9
|
||||
secrets:
|
||||
|
||||
110
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
110
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
@ -42,54 +42,7 @@ jobs:
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
manywheel-py3_9-cuda12_6-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu126
|
||||
GPU_ARCH_VERSION: 12.6
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_6-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_9-cuda12_6-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu126
|
||||
GPU_ARCH_VERSION: 12.6
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu # for other cuda versions, we use 4xlarge runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
manywheel-py3_9-cuda12_8-build:
|
||||
manywheel-py3_12-cuda12_8-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
@ -103,18 +56,17 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_8
|
||||
build_name: manywheel-py3_12-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_8-test: # Testing
|
||||
manywheel-py3_12-cuda12_8-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_9-cuda12_8-build
|
||||
- manywheel-py3_12-cuda12_8-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
@ -127,56 +79,8 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
manywheel-py3_9-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: 12.9
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_9-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_9-cuda12_9-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu129
|
||||
GPU_ARCH_VERSION: 12.9
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.9
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cuda12_9
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner
|
||||
|
||||
1313
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
1313
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
2
.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
generated
vendored
2
.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
generated
vendored
@ -58,7 +58,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: rocm
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-rocm6_4
|
||||
@ -83,7 +82,6 @@ jobs:
|
||||
SKIP_ALL_TESTS: 1
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
steps:
|
||||
- name: Setup ROCm
|
||||
|
||||
15
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
15
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
@ -60,7 +60,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runs_on: linux.s390x
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
@ -84,7 +83,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
@ -107,7 +105,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cpu-s390x
|
||||
secrets:
|
||||
@ -127,7 +124,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runs_on: linux.s390x
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
@ -151,7 +147,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
@ -174,7 +169,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cpu-s390x
|
||||
secrets:
|
||||
@ -194,7 +188,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runs_on: linux.s390x
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
@ -218,7 +211,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
@ -241,7 +233,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cpu-s390x
|
||||
secrets:
|
||||
@ -261,7 +252,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runs_on: linux.s390x
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
@ -285,7 +275,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
@ -308,7 +297,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cpu-s390x
|
||||
secrets:
|
||||
@ -328,7 +316,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13"
|
||||
runs_on: linux.s390x
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
@ -352,7 +339,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13"
|
||||
build_name: manywheel-py3_13-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
@ -375,7 +361,6 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu-s390x
|
||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cpu-s390x
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13"
|
||||
build_name: manywheel-py3_13-cpu-s390x
|
||||
secrets:
|
||||
|
||||
154
.github/workflows/inductor-perf-test-b200.yml
vendored
Normal file
154
.github/workflows/inductor-perf-test-b200.yml
vendored
Normal file
@ -0,0 +1,154 @@
|
||||
name: inductor-perf-b200
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 7 * * 1-6
|
||||
- cron: 0 7 * * 0
|
||||
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
|
||||
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
training:
|
||||
description: Run training (on by default)?
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
inference:
|
||||
description: Run inference (on by default)?
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
default:
|
||||
description: Run inductor_default?
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
dynamic:
|
||||
description: Run inductor_dynamic_shapes?
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
cppwrapper:
|
||||
description: Run inductor_cpp_wrapper?
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
cudagraphs:
|
||||
description: Run inductor_cudagraphs?
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
freezing_cudagraphs:
|
||||
description: Run inductor_cudagraphs with freezing for inference?
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
aotinductor:
|
||||
description: Run aot_inductor for inference?
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
maxautotune:
|
||||
description: Run inductor_max_autotune?
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
benchmark_configs:
|
||||
description: The list of configs used the benchmark
|
||||
required: false
|
||||
type: string
|
||||
default: inductor_huggingface_perf_cuda_b200,inductor_timm_perf_cuda_b200,inductor_torchbench_perf_cuda_b200
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
name: get-label-type
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
||||
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
|
||||
with:
|
||||
triggering_actor: ${{ github.triggering_actor }}
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
opt_out_experiments: lf
|
||||
|
||||
build:
|
||||
name: cuda12.8-py3.10-gcc9-sm100
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
# Use a bigger runner here because CUDA_ARCH 9.0 is only built for H100
|
||||
# or newer GPUs, so it doesn't benefit much from existing compiler cache
|
||||
# from trunk. Also use a memory-intensive runner here because memory is
|
||||
# usually the bottleneck
|
||||
runner: linux.12xlarge.memory
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
cuda-arch-list: '10.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor_huggingface_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
|
||||
{ config: "inductor_timm_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
|
||||
{ config: "inductor_torchbench_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
|
||||
]}
|
||||
selected-test-configs: ${{ inputs.benchmark_configs }}
|
||||
build-additional-packages: "vision audio fbgemm torchao"
|
||||
secrets: inherit
|
||||
|
||||
test-periodically:
|
||||
name: cuda12.8-py3.10-gcc9-sm100
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
if: github.event.schedule == '0 7 * * 1-6'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
timeout-minutes: 720
|
||||
disable-monitor: false
|
||||
monitor-log-interval: 15
|
||||
monitor-data-collect-interval: 4
|
||||
secrets: inherit
|
||||
|
||||
test-weekly:
|
||||
name: cuda12.8-py3.10-gcc9-sm100
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
if: github.event.schedule == '0 7 * * 0'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
timeout-minutes: 1440
|
||||
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
disable-monitor: false
|
||||
monitor-log-interval: 15
|
||||
monitor-data-collect-interval: 4
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
name: cuda12.8-py3.10-gcc9-sm100
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
|
||||
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
|
||||
docker-image: ${{ needs.build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.build.outputs.test-matrix }}
|
||||
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
timeout-minutes: 720
|
||||
disable-monitor: false
|
||||
monitor-log-interval: 15
|
||||
monitor-data-collect-interval: 4
|
||||
secrets: inherit
|
||||
@ -2,7 +2,7 @@ name: inductor-perf-nightly-h100
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: 15 0,4,8,12,16,20 * * 1-6
|
||||
- cron: 15 0,12 * * 1-6
|
||||
- cron: 0 7 * * 0
|
||||
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
|
||||
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
|
||||
@ -126,7 +126,7 @@ jobs:
|
||||
name: cuda12.8-py3.10-gcc9-sm90
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: build
|
||||
if: github.event.schedule == '15 0,4,8,12,16,20 * * 1-6'
|
||||
if: github.event.schedule == '15 0,12 * * 1-6'
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm90
|
||||
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
|
||||
|
||||
@ -85,26 +85,26 @@ jobs:
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-jammy-rocm-py3_10
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3-benchmarks
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
30
.github/workflows/inductor-periodic.yml
vendored
30
.github/workflows/inductor-periodic.yml
vendored
@ -81,21 +81,21 @@ jobs:
|
||||
sync-tag: rocm-build
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamo_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamo_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamo_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamo_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamic_aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamic_aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamic_aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamo_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamo_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamo_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamo_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamic_aot_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamic_aot_eager_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamic_aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
4
.github/workflows/inductor-rocm-mi300.yml
vendored
4
.github/workflows/inductor-rocm-mi300.yml
vendored
@ -47,8 +47,8 @@ jobs:
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
1
.github/workflows/mac-mps.yml
vendored
1
.github/workflows/mac-mps.yml
vendored
@ -28,7 +28,6 @@ jobs:
|
||||
# than our AWS macos-m1-14 runners
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "test_mps", shard: 1, num_shards: 1, runner: "macos-m1-13" },
|
||||
{ config: "test_mps", shard: 1, num_shards: 1, runner: "macos-m1-14" },
|
||||
{ config: "test_mps", shard: 1, num_shards: 1, runner: "macos-m2-15" },
|
||||
]}
|
||||
|
||||
9
.github/workflows/nightly.yml
vendored
9
.github/workflows/nightly.yml
vendored
@ -75,10 +75,11 @@ jobs:
|
||||
repo-owner: pytorch
|
||||
branch: main
|
||||
pin-folder: .github/ci_commit_pins
|
||||
- repo-name: executorch
|
||||
repo-owner: pytorch
|
||||
branch: main
|
||||
pin-folder: .ci/docker/ci_commit_pins
|
||||
# executorch jobs are disabled since it needs some manual work for the hash update
|
||||
# - repo-name: executorch
|
||||
# repo-owner: pytorch
|
||||
# branch: main
|
||||
# pin-folder: .ci/docker/ci_commit_pins
|
||||
- repo-name: triton
|
||||
repo-owner: triton-lang
|
||||
branch: main
|
||||
|
||||
6
.github/workflows/periodic-rocm-mi300.yml
vendored
6
.github/workflows/periodic-rocm-mi300.yml
vendored
@ -59,9 +59,9 @@ jobs:
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.gfx942.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
31
.github/workflows/periodic.yml
vendored
31
.github/workflows/periodic.yml
vendored
@ -51,37 +51,6 @@ jobs:
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
linux-jammy-cuda12_4-py3_10-gcc11-sm89-build:
|
||||
name: linux-jammy-cuda12.4-py3.10-gcc11-sm89
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.4-py3.10-gcc11-sm89
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11
|
||||
cuda-arch-list: 8.9
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_4-py3_10-gcc11-sm89-test:
|
||||
name: linux-jammy-cuda12.4-py3.10-gcc11-sm89
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-jammy-cuda12_4-py3_10-gcc11-sm89-build
|
||||
- target-determination
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.4-py3.10-gcc11-sm89
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_4-py3_10-gcc11-sm89-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_4-py3_10-gcc11-sm89-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_4-py3_10-gcc11-build:
|
||||
name: linux-jammy-cuda12.4-py3.10-gcc11
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
|
||||
126
.github/workflows/pull.yml
vendored
126
.github/workflows/pull.yml
vendored
@ -254,36 +254,6 @@ jobs:
|
||||
timeout-minutes: 600
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-build-distributed:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-build-distributed
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-distributed
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '7.5'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-test-distributed:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-test
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-jammy-cuda12_8-py3_10-gcc11-build-distributed
|
||||
- target-determination
|
||||
with:
|
||||
timeout-minutes: 360
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-distributed
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build-distributed.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build-distributed.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-build:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
@ -292,13 +262,18 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '7.5 8.9'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "pr_time_benchmarks", shard: 1, num_shards: 1, runner: "linux.g4dn.metal.nvidia.gpu" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
@ -329,30 +304,6 @@ jobs:
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-py3_9-clang9-xla-build:
|
||||
name: linux-jammy-py3_9-clang9-xla
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3.9-clang9-xla
|
||||
docker-image-name: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:v1.3-lite
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "xla", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-py3_9-clang9-xla-test:
|
||||
name: linux-jammy-py3_9-clang9-xla
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-jammy-py3_9-clang9-xla-build
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-clang9-xla
|
||||
docker-image: ${{ needs.linux-jammy-py3_9-clang9-xla-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-py3_9-clang9-xla-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cpu-py3_10-gcc11-bazel-test:
|
||||
name: linux-jammy-cpu-py3.10-gcc11-bazel-test
|
||||
uses: ./.github/workflows/_bazel-build-test.yml
|
||||
@ -402,38 +353,8 @@ jobs:
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm89-build:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm89
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm89
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: 8.9
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm89-test:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm89
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-jammy-cuda12_8-py3_10-gcc11-sm89-build
|
||||
- target-determination
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm89
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm89-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm89-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-py3-clang12-executorch-build:
|
||||
if: false # Docker build needs pin update
|
||||
name: linux-jammy-py3-clang12-executorch
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
@ -458,31 +379,6 @@ jobs:
|
||||
test-matrix: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
|
||||
name: cuda12.8-py3.10-gcc9-sm75
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm75
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
|
||||
cuda-arch-list: '7.5'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "pr_time_benchmarks", shard: 1, num_shards: 1, runner: "linux.g4dn.metal.nvidia.gpu" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc9-inductor-test:
|
||||
name: cuda12.8-py3.10-gcc9-sm75
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-jammy-cuda12_8-py3_10-gcc9-inductor-build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm75
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc9-inductor-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-xpu-2025_1-py3_9-build:
|
||||
name: linux-jammy-xpu-2025.1-py3.9
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
|
||||
2
.github/workflows/revert.yml
vendored
2
.github/workflows/revert.yml
vendored
@ -26,7 +26,7 @@ jobs:
|
||||
architecture: x64
|
||||
check-latest: false
|
||||
cache: pip
|
||||
- run: pip install pyyaml==6.0
|
||||
- run: pip install pyyaml==6.0.2
|
||||
|
||||
- name: Setup committer id
|
||||
run: |
|
||||
|
||||
12
.github/workflows/rocm-mi300.yml
vendored
12
.github/workflows/rocm-mi300.yml
vendored
@ -48,12 +48,12 @@ jobs:
|
||||
sync-tag: rocm-build
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.gfx942.2" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
2
.github/workflows/rocm-mi355.yml
vendored
2
.github/workflows/rocm-mi355.yml
vendored
@ -3,7 +3,7 @@ name: rocm-mi355
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: 30 9 * * * # about 2:30am PDT
|
||||
- cron: 30 11,1 * * * # about 4:30am PDT and 6:30pm PDT
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
|
||||
4
.github/workflows/torchbench.yml
vendored
4
.github/workflows/torchbench.yml
vendored
@ -10,6 +10,10 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
get-default-label-prefix:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
|
||||
3
.github/workflows/trunk.yml
vendored
3
.github/workflows/trunk.yml
vendored
@ -94,7 +94,6 @@ jobs:
|
||||
{ config: "default", shard: 1, num_shards: 3, runner: "macos-m1-stable" },
|
||||
{ config: "default", shard: 2, num_shards: 3, runner: "macos-m1-stable" },
|
||||
{ config: "default", shard: 3, num_shards: 3, runner: "macos-m1-stable" },
|
||||
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-13" },
|
||||
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-14" },
|
||||
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m2-15" },
|
||||
]}
|
||||
@ -206,7 +205,7 @@ jobs:
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3.9-gcc11
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "verify_cachebench", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
|
||||
2
.github/workflows/trymerge.yml
vendored
2
.github/workflows/trymerge.yml
vendored
@ -28,7 +28,7 @@ jobs:
|
||||
check-latest: false
|
||||
cache: pip
|
||||
architecture: x64
|
||||
- run: pip install pyyaml==6.0
|
||||
- run: pip install pyyaml==6.0.2
|
||||
|
||||
- name: Setup committer id
|
||||
run: |
|
||||
|
||||
2
.github/workflows/tryrebase.yml
vendored
2
.github/workflows/tryrebase.yml
vendored
@ -25,7 +25,7 @@ jobs:
|
||||
architecture: x64
|
||||
check-latest: false
|
||||
cache: pip
|
||||
- run: pip install pyyaml==6.0
|
||||
- run: pip install pyyaml==6.0.2
|
||||
|
||||
- name: Setup committer id
|
||||
run: |
|
||||
|
||||
28
.github/workflows/unstable.yml
vendored
28
.github/workflows/unstable.yml
vendored
@ -12,7 +12,9 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions: read-all
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
# There must be at least one job here to satisfy GitHub action workflow syntax
|
||||
@ -51,3 +53,27 @@ jobs:
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
linux-jammy-py3_9-clang9-xla-build:
|
||||
name: linux-jammy-py3_9-clang9-xla
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-py3.9-clang9-xla
|
||||
docker-image-name: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:v1.3-lite
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "xla", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-py3_9-clang9-xla-test:
|
||||
name: linux-jammy-py3_9-clang9-xla
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-jammy-py3_9-clang9-xla-build
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-clang9-xla
|
||||
docker-image: ${{ needs.linux-jammy-py3_9-clang9-xla-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-py3_9-clang9-xla-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
2
.github/workflows/update-viablestrict.yml
vendored
2
.github/workflows/update-viablestrict.yml
vendored
@ -23,7 +23,7 @@ jobs:
|
||||
with:
|
||||
repository: pytorch/pytorch
|
||||
stable-branch: viable/strict
|
||||
requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\"]'
|
||||
requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\", \"linux-aarch64\"]'
|
||||
secret-bot-token: ${{ secrets.MERGEBOT_TOKEN }}
|
||||
clickhouse-url: ${{ secrets.CLICKHOUSE_URL }}
|
||||
clickhouse-username: ${{ secrets.CLICKHOUSE_VIABLESTRICT_USERNAME }}
|
||||
|
||||
@ -39,16 +39,16 @@ init_command = [
|
||||
'python3',
|
||||
'tools/linter/adapters/pip_init.py',
|
||||
'--dry-run={{DRYRUN}}',
|
||||
'flake8==6.1.0',
|
||||
'flake8-bugbear==23.3.23',
|
||||
'flake8-comprehensions==3.15.0',
|
||||
'flake8==7.3.0',
|
||||
'flake8-bugbear==24.12.12',
|
||||
'flake8-comprehensions==3.16.0',
|
||||
'flake8-executable==2.1.3',
|
||||
'flake8-logging-format==0.9.0',
|
||||
'flake8-pyi==23.3.1',
|
||||
'flake8-simplify==0.19.3',
|
||||
'flake8-logging-format==2024.24.12',
|
||||
'flake8-pyi==25.5.0',
|
||||
'flake8-simplify==0.22.0',
|
||||
'mccabe==0.7.0',
|
||||
'pycodestyle==2.11.1',
|
||||
'pyflakes==3.1.0',
|
||||
'pycodestyle==2.14.0',
|
||||
'pyflakes==3.4.0',
|
||||
'torchfix==0.4.0 ; python_version >= "3.9" and python_version < "3.13"',
|
||||
]
|
||||
|
||||
@ -158,16 +158,16 @@ init_command = [
|
||||
'mypy==1.16.0',
|
||||
'sympy==1.13.3',
|
||||
'types-requests==2.27.25',
|
||||
'types-pyyaml==6.0.1',
|
||||
'types-pyyaml==6.0.2',
|
||||
'types-tabulate==0.8.8',
|
||||
'types-protobuf==5.29.1.20250403',
|
||||
'types-setuptools==79.0.0.20250422',
|
||||
'types-jinja2==2.11.9',
|
||||
'types-colorama==0.4.6',
|
||||
'filelock==3.13.1',
|
||||
'filelock==3.18.0',
|
||||
'junitparser==2.1.1',
|
||||
'rich==10.9.0',
|
||||
'pyyaml==6.0.1',
|
||||
'rich==14.1.0',
|
||||
'pyyaml==6.0.2',
|
||||
'optree==0.13.0',
|
||||
'dataclasses-json==0.6.7',
|
||||
'pandas==2.2.3',
|
||||
@ -1111,7 +1111,7 @@ init_command = [
|
||||
'python3',
|
||||
'tools/linter/adapters/pip_init.py',
|
||||
'--dry-run={{DRYRUN}}',
|
||||
'PyYAML==6.0.1',
|
||||
'pyyaml==6.0.2',
|
||||
]
|
||||
|
||||
[[linter]]
|
||||
@ -1133,7 +1133,7 @@ init_command = [
|
||||
'python3',
|
||||
'tools/linter/adapters/pip_init.py',
|
||||
'--dry-run={{DRYRUN}}',
|
||||
'PyYAML==6.0.1',
|
||||
'pyyaml==6.0.2',
|
||||
]
|
||||
|
||||
[[linter]]
|
||||
@ -1452,8 +1452,6 @@ init_command = [
|
||||
'python3',
|
||||
'tools/linter/adapters/pip_init.py',
|
||||
'--dry-run={{DRYRUN}}',
|
||||
'--no-black-binary',
|
||||
'black==23.12.1',
|
||||
'usort==1.0.8.post1',
|
||||
'isort==6.0.1',
|
||||
'ruff==0.12.2', # sync with RUFF
|
||||
|
||||
16
AGENTS.md
16
AGENTS.md
@ -1 +1,17 @@
|
||||
- This is the only AGENTS.md, there are no recursive AGENTS.md
|
||||
- When you are working on a bug, first create a standalone file that
|
||||
reproduces the bug and verify it fails in the expected way. Use this to
|
||||
test if your changes work. Once the change is passing, find an appropriate
|
||||
test file to add the test to and make sure to follow local conventions on
|
||||
the test file.
|
||||
- If you are running the real test suite, DO NOT run the entire test suite.
|
||||
Instead run only a single test case, e.g., 'python test/test_torch.py TestTorch.test_dir'
|
||||
- Do NOT run setup.py, you do not have a working build environment
|
||||
- Do NOT run pre-commit, it is not setup
|
||||
- To run lint, run 'lintrunner -a' (which will autoapply changes)
|
||||
- Do NOT attempt to install dependencies, you do not have Internet access
|
||||
- When you are ready to make a PR, do exactly these steps:
|
||||
- git stash -u
|
||||
- git reset --hard $(cat /tmp/orig_work.txt) # NB: reset to the LOCAL branch, do NOT fetch
|
||||
- git stash pop
|
||||
- Resolve conflicts if necessary
|
||||
|
||||
@ -679,6 +679,7 @@ cc_library(
|
||||
[
|
||||
"torch/*.h",
|
||||
"torch/csrc/**/*.h",
|
||||
"torch/nativert/**/*.h",
|
||||
"torch/csrc/distributed/c10d/**/*.hpp",
|
||||
"torch/lib/libshm/*.h",
|
||||
],
|
||||
|
||||
@ -240,6 +240,8 @@ cmake_dependent_option(
|
||||
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
|
||||
"USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
|
||||
cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
|
||||
cmake_dependent_option(USE_ROCM_CK_GEMM "Use ROCm Composable Kernel for GEMMs" ON "USE_ROCM;NOT WIN32" OFF)
|
||||
option(USE_ROCM_CK_SDPA "Use ROCm Composable Kernel for SDPA" OFF)
|
||||
option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
|
||||
cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
|
||||
cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
|
||||
@ -251,7 +253,6 @@ cmake_dependent_option(USE_CUFILE "Use cuFile" ON "USE_CUDA AND NOT WIN32" OFF)
|
||||
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
|
||||
option(USE_KINETO "Use Kineto profiling library" ON)
|
||||
option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
|
||||
option(USE_FAKELOWP "Use FakeLowp operators" OFF)
|
||||
option(USE_GFLAGS "Use GFLAGS" OFF)
|
||||
option(USE_GLOG "Use GLOG" OFF)
|
||||
option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
|
||||
@ -260,8 +261,9 @@ option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF)
|
||||
option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF)
|
||||
option(USE_NATIVE_ARCH "Use -march=native" OFF)
|
||||
cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF)
|
||||
option(USE_DISTRIBUTED "Use distributed" ON)
|
||||
cmake_dependent_option(USE_NCCL "Use NCCL" ON
|
||||
"USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
|
||||
"USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
|
||||
cmake_dependent_option(USE_XCCL "Use XCCL" ON
|
||||
"USE_XPU;UNIX;NOT APPLE" OFF)
|
||||
cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF)
|
||||
@ -322,7 +324,6 @@ set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN})
|
||||
cmake_dependent_option(USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF "USE_MKLDNN"
|
||||
OFF)
|
||||
option(USE_STATIC_MKL "Prefer to link with MKL statically (Unix only)" OFF)
|
||||
option(USE_DISTRIBUTED "Use distributed" ON)
|
||||
cmake_dependent_option(
|
||||
USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
|
||||
"USE_DISTRIBUTED" OFF)
|
||||
@ -564,7 +565,7 @@ if(MSVC)
|
||||
set(CMAKE_NINJA_CMCLDEPS_RC OFF)
|
||||
if(MSVC_Z7_OVERRIDE)
|
||||
# CMake set debug flags to use /Z7
|
||||
set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
|
||||
set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "$<$<CONFIG:Debug,RelWithDebInfo>:Embedded>")
|
||||
endif()
|
||||
foreach(
|
||||
flag_var
|
||||
@ -834,10 +835,11 @@ include(ExternalProject)
|
||||
|
||||
# ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and
|
||||
# CMAKE_SYSTEM_PROCESSOR thinks its 64bit
|
||||
if(USE_FBGEMM
|
||||
AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
|
||||
4)
|
||||
OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
|
||||
if(USE_FBGEMM AND NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
|
||||
message(WARNING
|
||||
"x64 operating system is required for FBGEMM. "
|
||||
"Not compiling with FBGEMM. "
|
||||
"Turn this warning off by USE_FBGEMM=OFF.")
|
||||
set(USE_FBGEMM OFF)
|
||||
endif()
|
||||
|
||||
@ -872,6 +874,14 @@ cmake_dependent_option(
|
||||
"USE_CUDA OR USE_ROCM;NOT MSVC"
|
||||
OFF)
|
||||
|
||||
cmake_dependent_option(
|
||||
USE_FBGEMM_GENAI
|
||||
"Whether to build FBGEMM GenAI quantized GEMM kernels.\
|
||||
Will be disabled if not supported by the platform"
|
||||
OFF
|
||||
"USE_CUDA OR USE_ROCM"
|
||||
OFF)
|
||||
|
||||
# CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem
|
||||
# Eff Attention won't
|
||||
cmake_dependent_option(
|
||||
@ -905,6 +915,10 @@ if(USE_FBGEMM)
|
||||
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
|
||||
endif()
|
||||
|
||||
if(USE_FBGEMM_GENAI)
|
||||
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM_GENAI")
|
||||
endif()
|
||||
|
||||
if(USE_PYTORCH_QNNPACK)
|
||||
string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK")
|
||||
endif()
|
||||
|
||||
19
CODEOWNERS
19
CODEOWNERS
@ -14,7 +14,6 @@
|
||||
/torch/csrc/autograd/ @albanD @soulitzer
|
||||
/torch/autograd/ @albanD @soulitzer
|
||||
/tools/autograd/ @albanD @soulitzer
|
||||
/torch/header_only_apis.txt @janeyx99
|
||||
/torch/nn/ @albanD @jbschlosser @mikaylagawarecki
|
||||
/torch/optim/ @albanD @janeyx99
|
||||
/test/test_public_bindings.py @albanD
|
||||
@ -51,12 +50,12 @@ nn/qat/ @jerryzh168
|
||||
/torch/csrc/distributed/c10d/Ops.* @kwen2501
|
||||
|
||||
# ONNX Export
|
||||
/torch/_dynamo/backends/onnxrt.py @wschin
|
||||
/torch/csrc/jit/passes/onnx.h @titaiwangms @shubhambhokare1
|
||||
/torch/csrc/jit/passes/onnx.cpp @titaiwangms @shubhambhokare1
|
||||
/torch/csrc/jit/passes/onnx/ @titaiwangms @shubhambhokare1
|
||||
/torch/onnx/ @titaiwangms @shubhambhokare1 @justinchuby @wschin
|
||||
/test/onnx/ @titaiwangms @shubhambhokare1 @justinchuby @wschin
|
||||
/torch/_dynamo/backends/onnxrt.py @titaiwangms @xadupre @justinchuby
|
||||
/torch/csrc/jit/passes/onnx.h @titaiwangms @xadupre
|
||||
/torch/csrc/jit/passes/onnx.cpp @titaiwangms @xadupre
|
||||
/torch/csrc/jit/passes/onnx/ @titaiwangms @xadupre
|
||||
/torch/onnx/ @titaiwangms @xadupre @justinchuby
|
||||
/test/onnx/ @titaiwangms @xadupre @justinchuby
|
||||
|
||||
# CI
|
||||
/.ci @pytorch/pytorch-dev-infra
|
||||
@ -165,6 +164,7 @@ caffe2/utils/hip @jeffdaily @jithunnair-amd
|
||||
# torch.export
|
||||
/torch/export/ @avikchaudhuri @tugsbayasgalan @zhxchen17 @ydwu4 @angelayi
|
||||
/torch/_export/ @avikchaudhuri @tugsbayasgalan @zhxchen17 @ydwu4 @angelayi
|
||||
/torch/_export/serde/schema.py @SherlockNoMad @zhxchen17
|
||||
|
||||
# Dynamic Shapes
|
||||
/torch/fx/experimental/symbolic_shapes.py @bobrenjc93 @laithsakka
|
||||
@ -196,3 +196,8 @@ torch/backends/cudnn/ @eqy @syed-ahmed
|
||||
/torch/utils/_cxx_pytree.py @XuehaiPan
|
||||
/torch/utils/pytree/ @XuehaiPan
|
||||
/torch/_dynamo/polyfills/pytree.py @XuehaiPan
|
||||
|
||||
# Relating to libtorch ABI
|
||||
/torch/csrc/stable/ @janeyx99 @mikaylagawarecki
|
||||
/torch/headeronly/ @janeyx99
|
||||
/torch/header_only_apis.txt @janeyx99
|
||||
|
||||
15
Dockerfile
15
Dockerfile
@ -47,18 +47,6 @@ WORKDIR /opt/pytorch
|
||||
COPY . .
|
||||
RUN git submodule update --init --recursive
|
||||
|
||||
FROM conda as build
|
||||
ARG CMAKE_VARS
|
||||
WORKDIR /opt/pytorch
|
||||
COPY --from=conda /opt/conda /opt/conda
|
||||
COPY --from=submodule-update /opt/pytorch /opt/pytorch
|
||||
RUN make triton
|
||||
RUN --mount=type=cache,target=/opt/ccache \
|
||||
export eval ${CMAKE_VARS} && \
|
||||
TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 8.9 9.0 9.0a" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
|
||||
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
|
||||
python -m pip install --no-build-isolation -v .
|
||||
|
||||
FROM conda as conda-installs
|
||||
ARG PYTHON_VERSION=3.11
|
||||
ARG CUDA_PATH=cu121
|
||||
@ -109,4 +97,5 @@ WORKDIR /workspace
|
||||
|
||||
FROM official as dev
|
||||
# Should override the already installed version from the official-image stage
|
||||
COPY --from=build /opt/conda /opt/conda
|
||||
COPY --from=conda /opt/conda /opt/conda
|
||||
COPY --from=submodule-update /opt/pytorch /opt/pytorch
|
||||
|
||||
@ -243,7 +243,7 @@ git submodule update --init --recursive
|
||||
|
||||
```bash
|
||||
conda install cmake ninja
|
||||
# Run this command from the PyTorch directory after cloning the source code using the “Get the PyTorch Source“ section below
|
||||
# Run this command from the PyTorch directory after cloning the source code using the “Get the PyTorch Source“ section above
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
@ -276,7 +276,7 @@ conda install pkg-config libuv
|
||||
pip install mkl-static mkl-include
|
||||
# Add these packages if torch.distributed is needed.
|
||||
# Distributed package support on Windows is a prototype feature and is subject to changes.
|
||||
conda install -c conda-forge libuv=1.39
|
||||
conda install -c conda-forge libuv
|
||||
```
|
||||
|
||||
#### Install PyTorch
|
||||
@ -560,7 +560,7 @@ To learn more about making a contribution to Pytorch, please see our [Contributi
|
||||
|
||||
PyTorch is a community-driven project with several skillful engineers and researchers contributing to it.
|
||||
|
||||
PyTorch is currently maintained by [Soumith Chintala](http://soumith.ch), [Gregory Chanan](https://github.com/gchanan), [Dmytro Dzhulgakov](https://github.com/dzhulgakov), [Edward Yang](https://github.com/ezyang), and [Nikita Shulga](https://github.com/malfet) with major contributions coming from hundreds of talented individuals in various forms and means.
|
||||
PyTorch is currently maintained by [Soumith Chintala](http://soumith.ch), [Gregory Chanan](https://github.com/gchanan), [Dmytro Dzhulgakov](https://github.com/dzhulgakov), [Edward Yang](https://github.com/ezyang), [Alban Desmaison](https://github.com/albanD), [Piotr Bialecki](https://github.com/ptrblck) and [Nikita Shulga](https://github.com/malfet) with major contributions coming from hundreds of talented individuals in various forms and means.
|
||||
A non-exhaustive but growing list needs to mention: [Trevor Killeen](https://github.com/killeent), [Sasank Chilamkurthy](https://github.com/chsasank), [Sergey Zagoruyko](https://github.com/szagoruyko), [Adam Lerer](https://github.com/adamlerer), [Francisco Massa](https://github.com/fmassa), [Alykhan Tejani](https://github.com/alykhantejani), [Luca Antiga](https://github.com/lantiga), [Alban Desmaison](https://github.com/albanD), [Andreas Koepf](https://github.com/andreaskoepf), [James Bradbury](https://github.com/jekbradbury), [Zeming Lin](https://github.com/ebetica), [Yuandong Tian](https://github.com/yuandong-tian), [Guillaume Lample](https://github.com/glample), [Marat Dukhan](https://github.com/Maratyszcza), [Natalia Gimelshein](https://github.com/ngimel), [Christian Sarofeen](https://github.com/csarofeen), [Martin Raison](https://github.com/martinraison), [Edward Yang](https://github.com/ezyang), [Zachary Devito](https://github.com/zdevito). <!-- codespell:ignore -->
|
||||
|
||||
Note: This project is unrelated to [hughperkins/pytorch](https://github.com/hughperkins/pytorch) with the same name. Hugh is a valuable contributor to the Torch community and has helped with many things Torch and PyTorch.
|
||||
|
||||
@ -119,6 +119,8 @@ file(GLOB_RECURSE native_mps_cpp "native/mps/*.cpp")
|
||||
file(GLOB_RECURSE native_mps_mm "native/mps/*.mm")
|
||||
file(GLOB_RECURSE native_mps_metal "native/mps/*.metal")
|
||||
file(GLOB_RECURSE native_mps_h "native/mps/*.h")
|
||||
file(GLOB_RECURSE native_sparse_mps_mm "native/sparse/mps/*.mm")
|
||||
file(GLOB_RECURSE native_mps_sparse_metal "native/sparse/mps/*.metal")
|
||||
|
||||
file(GLOB native_sparse_cpp "native/sparse/*.cpp")
|
||||
file(GLOB native_quantized_cpp
|
||||
@ -178,26 +180,27 @@ file(GLOB native_flash_attn_api_cpp "native/transformers/cuda/flash_attn/flash_a
|
||||
file(GLOB flash_attention_hip_hip "native/transformers/hip/flash_attn/*.hip")
|
||||
# if USE_FLASH_ATTENTION is set, ensure CK instances get generated
|
||||
if(USE_FLASH_ATTENTION)
|
||||
if(DEFINED ENV{USE_CK_FLASH_ATTENTION})
|
||||
set(USE_CK_FLASH_ATTENTION $ENV{USE_CK_FLASH_ATTENTION})
|
||||
if(USE_CK_FLASH_ATTENTION STREQUAL "1")
|
||||
if(DEFINED ENV{PYTORCH_ROCM_ARCH})
|
||||
list(LENGTH PYTORCH_ROCM_ARCH NUM_ARCHS)
|
||||
if(NUM_ARCHS GREATER 1)
|
||||
message(WARNING "Building CK for multiple archs can increase build time considerably!
|
||||
Consider setting PYTORCH_ROCM_ARCH env var value as the gfx arch you need to build for")
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "USE_CK_FLASH_ATTENTION is set; building PyTorch with CK Flash Attention enabled")
|
||||
message(STATUS "Generating CK kernel instances...")
|
||||
add_subdirectory(native/transformers/hip/flash_attn/ck)
|
||||
file(GLOB flash_attention_hip_ck_hip "native/transformers/hip/flash_attn/ck/*.hip")
|
||||
list(APPEND native_transformers_hip_hip ${flash_attention_hip_ck_hip})
|
||||
# FAv3 Generation
|
||||
add_subdirectory(native/transformers/hip/flash_attn/ck/fav_v3)
|
||||
file(GLOB flash_attention_v3_hip "native/transformers/hip/flash_attn/ck/fav_v3/*.hip")
|
||||
list(APPEND native_transformers_hip_hip ${flash_attention_v3_hip})
|
||||
if("$ENV{USE_CK_FLASH_ATTENTION}" STREQUAL "1")
|
||||
message(STATUS "USE_CK_FLASH_ATTENTION is being deprecated. Please use USE_ROCM_CK_SDPA instead")
|
||||
caffe2_update_option(USE_ROCM_CK_SDPA ON)
|
||||
endif()
|
||||
if(USE_ROCM_CK_SDPA)
|
||||
if(DEFINED ENV{PYTORCH_ROCM_ARCH})
|
||||
list(LENGTH PYTORCH_ROCM_ARCH NUM_ARCHS)
|
||||
if(NUM_ARCHS GREATER 1)
|
||||
message(WARNING "Building CK for multiple archs can increase build time considerably!
|
||||
Consider setting PYTORCH_ROCM_ARCH env var value as the gfx arch you need to build for")
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "USE_ROCM_CK_SDPA is set; building PyTorch with CK SDPA enabled")
|
||||
message(STATUS "Generating CK kernel instances...")
|
||||
add_subdirectory(native/transformers/hip/flash_attn/ck)
|
||||
file(GLOB flash_attention_hip_ck_hip "native/transformers/hip/flash_attn/ck/*.hip")
|
||||
list(APPEND native_transformers_hip_hip ${flash_attention_hip_ck_hip})
|
||||
# FAv3 Generation
|
||||
add_subdirectory(native/transformers/hip/flash_attn/ck/fav_v3)
|
||||
file(GLOB flash_attention_v3_hip "native/transformers/hip/flash_attn/ck/fav_v3/*.hip")
|
||||
list(APPEND native_transformers_hip_hip ${flash_attention_v3_hip})
|
||||
endif()
|
||||
file(GLOB flash_attention_hip_aot_hip "native/transformers/hip/flash_attn/aot/*.hip")
|
||||
file(GLOB flash_attention_src_hip_hip "native/transformers/hip/flash_attn/src/*.hip")
|
||||
@ -247,6 +250,50 @@ if(USE_MEM_EFF_ATTENTION)
|
||||
list(APPEND ATen_ATTENTION_KERNEL_SRCS ${mem_eff_attention_cuda_kernels_cu})
|
||||
endif()
|
||||
|
||||
IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH)
|
||||
message(WARNING "Unsupported ROCM arch for FBGEMM GenAI, will set USE_FBGEMM_GENAI to OFF")
|
||||
set(USE_FBGEMM_GENAI off)
|
||||
endif()
|
||||
|
||||
# FBGEMM GenAI
|
||||
IF(USE_FBGEMM_GENAI)
|
||||
set(FBGEMM_THIRD_PARTY ${PROJECT_SOURCE_DIR}/third_party/fbgemm/external/)
|
||||
set(FBGEMM_GENAI_DIR ${PROJECT_SOURCE_DIR}/third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize)
|
||||
|
||||
if(USE_ROCM)
|
||||
# Only include the kernels we want to build to avoid increasing binary size.
|
||||
file(GLOB_RECURSE fbgemm_genai_native_rocm_hip
|
||||
"${FBGEMM_GENAI_DIR}/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped*.hip"
|
||||
"${FBGEMM_GENAI_DIR}/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip")
|
||||
set_source_files_properties(${fbgemm_genai_native_rocm_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
|
||||
# Add additional HIPCC compiler flags for performance
|
||||
set(FBGEMM_GENAI_EXTRA_HIPCC_FLAGS
|
||||
-mllvm
|
||||
-amdgpu-coerce-illegal-types=1
|
||||
-mllvm
|
||||
-enable-post-misched=0
|
||||
-mllvm
|
||||
-greedy-reverse-local-assignment=1
|
||||
-fhip-new-launch-api)
|
||||
|
||||
hip_add_library(
|
||||
fbgemm_genai STATIC
|
||||
${fbgemm_genai_native_rocm_hip}
|
||||
HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS})
|
||||
set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES)
|
||||
|
||||
target_include_directories(fbgemm_genai PUBLIC
|
||||
# FBGEMM version of Composable Kernel is used due to some customizations
|
||||
${FBGEMM_THIRD_PARTY}/composable_kernel/include
|
||||
${FBGEMM_THIRD_PARTY}/composable_kernel/library/include
|
||||
${FBGEMM_GENAI_DIR}/include/
|
||||
${FBGEMM_GENAI_DIR}/common/include/
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# XNNPACK
|
||||
file(GLOB native_xnnpack "native/xnnpack/*.cpp")
|
||||
|
||||
@ -372,39 +419,42 @@ if(USE_CUDA)
|
||||
endif()
|
||||
|
||||
if(USE_ROCM)
|
||||
# NOTE: The PyTorch build does not actually add_subdirectory
|
||||
# third_party/composable_kernel or use it as a CMake library. What is used
|
||||
# is header only, so this should be ok, except that the CMake build generates
|
||||
# a ck/config.h. We just do that part here. Without this, the ck.h from the
|
||||
# ROCM SDK may get accidentally used instead.
|
||||
function(_pytorch_rocm_generate_ck_conf)
|
||||
set(CK_ENABLE_INT8 "ON")
|
||||
set(CK_ENABLE_FP16 "ON")
|
||||
set(CK_ENABLE_FP32 "ON")
|
||||
set(CK_ENABLE_FP64 "ON")
|
||||
set(CK_ENABLE_BF16 "ON")
|
||||
set(CK_ENABLE_FP8 "ON")
|
||||
set(CK_ENABLE_BF8 "ON")
|
||||
set(CK_USE_XDL "ON")
|
||||
set(CK_USE_WMMA "ON")
|
||||
configure_file(
|
||||
"${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
|
||||
)
|
||||
endfunction()
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/aiter/csrc/include)
|
||||
_pytorch_rocm_generate_ck_conf()
|
||||
if((USE_FLASH_ATTENTION AND USE_ROCM_CK_SDPA) OR USE_ROCM_CK_GEMM)
|
||||
# NOTE: The PyTorch build does not actually add_subdirectory
|
||||
# third_party/composable_kernel or use it as a CMake library. What is used
|
||||
# is header only, so this should be ok, except that the CMake build generates
|
||||
# a ck/config.h. We just do that part here. Without this, the ck.h from the
|
||||
# ROCM SDK may get accidentally used instead.
|
||||
function(_pytorch_rocm_generate_ck_conf)
|
||||
set(CK_ENABLE_INT8 "ON")
|
||||
set(CK_ENABLE_FP16 "ON")
|
||||
set(CK_ENABLE_FP32 "ON")
|
||||
set(CK_ENABLE_FP64 "ON")
|
||||
set(CK_ENABLE_BF16 "ON")
|
||||
set(CK_ENABLE_FP8 "ON")
|
||||
set(CK_ENABLE_BF8 "ON")
|
||||
set(CK_USE_XDL "ON")
|
||||
set(CK_USE_WMMA "ON")
|
||||
configure_file(
|
||||
"${Torch_SOURCE_DIR}/third_party/composable_kernel/include/ck/config.h.in"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/composable_kernel/ck/config.h"
|
||||
)
|
||||
endfunction()
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/example/ck_tile/01_fmha)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/aiter/csrc/include)
|
||||
_pytorch_rocm_generate_ck_conf()
|
||||
endif()
|
||||
|
||||
# Next two lines are needed because TunableOp uses third-party/fmt
|
||||
list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt::fmt-header-only,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
list(APPEND ATen_HIP_DEPENDENCY_LIBS fmt::fmt-header-only)
|
||||
if(USE_FLASH_ATTENTION)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/transformers/hip/flash_attn/ck)
|
||||
endif()
|
||||
if(USE_FLASH_ATTENTION AND USE_ROCM_CK_SDPA)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/transformers/hip/flash_attn/ck)
|
||||
endif()
|
||||
list(APPEND ATen_HIP_SRCS
|
||||
${ATen_HIP_SRCS}
|
||||
${hip_hip}
|
||||
@ -414,12 +464,17 @@ endif()
|
||||
${native_quantized_hip_hip}
|
||||
${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
|
||||
)
|
||||
if(WIN32) # Windows doesn't support Composable Kernels
|
||||
if(NOT USE_ROCM_CK_GEMM)
|
||||
file(GLOB native_hip_bgemm "native/hip/bgemm_kernels/*.hip")
|
||||
file(GLOB native_hip_ck "native/hip/ck*.hip")
|
||||
exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
|
||||
${native_hip_bgemm} ${native_hip_ck})
|
||||
endif()
|
||||
if(WIN32) # Windows doesn't support Composable Kernels and Triton
|
||||
exclude(ATen_HIP_SRCS "${ATen_HIP_SRCS}"
|
||||
${native_transformers_hip_hip} ${native_transformers_hip_cpp})
|
||||
endif()
|
||||
|
||||
# TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
|
||||
list(APPEND all_hip_cpp
|
||||
${native_nested_hip_cpp}
|
||||
@ -586,17 +641,10 @@ if(USE_CUDA AND NOT USE_ROCM)
|
||||
CUDA::cufft_static_nocallback
|
||||
)
|
||||
if(NOT BUILD_LAZY_CUDA_LINALG)
|
||||
if(CUDA_VERSION_MAJOR LESS_EQUAL 11)
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
|
||||
CUDA::cusolver_static
|
||||
${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static
|
||||
)
|
||||
elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12)
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
|
||||
CUDA::cusolver_static
|
||||
${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a # needed for libcusolver_static
|
||||
)
|
||||
endif()
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
|
||||
CUDA::cusolver_static
|
||||
${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a # needed for libcusolver_static
|
||||
)
|
||||
endif()
|
||||
else()
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
|
||||
@ -661,29 +709,25 @@ endif()
|
||||
if(USE_MPS)
|
||||
include(../../../cmake/Metal.cmake)
|
||||
|
||||
set(ATen_MPS_SRCS ${ATen_MPS_SRCS} ${mps_cpp} ${mps_mm} ${mps_h} ${native_mps_cpp} ${native_mps_mm} ${native_mps_h})
|
||||
set(ATen_MPS_SRCS ${ATen_MPS_SRCS} ${mps_cpp} ${mps_mm} ${mps_h} ${native_mps_cpp} ${native_mps_mm} ${native_mps_h} ${native_sparse_mps_mm})
|
||||
|
||||
if(CAN_COMPILE_METAL)
|
||||
foreach(SHADER ${native_mps_metal})
|
||||
foreach(SHADER ${native_mps_metal} ${native_mps_sparse_metal})
|
||||
cmake_path(GET SHADER STEM TGT_STEM)
|
||||
string(CONCAT TGT_BASIC ${TGT_STEM} "_30.air")
|
||||
string(CONCAT TGT_BFLOAT ${TGT_STEM} "_31.air")
|
||||
string(CONCAT TGT_BASIC ${TGT_STEM} "_31.air")
|
||||
list(APPEND AIR_BASIC ${TGT_BASIC})
|
||||
list(APPEND AIR_BFLOAT ${TGT_BFLOAT})
|
||||
metal_to_air(${SHADER} ${TGT_BASIC} "-std=metal3.0")
|
||||
metal_to_air(${SHADER} ${TGT_BFLOAT} "-std=metal3.1")
|
||||
metal_to_air(${SHADER} ${TGT_BASIC} "-std=metal3.1")
|
||||
endforeach()
|
||||
air_to_metallib(kernels_basic.metallib ${AIR_BASIC})
|
||||
air_to_metallib(kernels_bfloat.metallib ${AIR_BFLOAT})
|
||||
add_custom_command(
|
||||
COMMAND echo "// $$(date)" > metallib_dummy.cpp
|
||||
DEPENDS kernels_basic.metallib kernels_bfloat.metallib
|
||||
DEPENDS kernels_basic.metallib
|
||||
OUTPUT metallib_dummy.cpp
|
||||
COMMENT "Updating metallibs timestamp")
|
||||
add_custom_target(metallibs DEPENDS kernels_basic.metallib kernels_bfloat.metallib metallib_dummy.cpp)
|
||||
add_custom_target(metallibs DEPENDS kernels_basic.metallib metallib_dummy.cpp)
|
||||
else()
|
||||
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/native/mps")
|
||||
foreach(SHADER ${native_mps_metal})
|
||||
foreach(SHADER ${native_mps_metal} ${native_mps_sparse_metal})
|
||||
cmake_path(GET SHADER STEM TGT_STEM)
|
||||
string(CONCAT SHADER_HDR_NAME "${CMAKE_CURRENT_BINARY_DIR}" /native/mps/ ${TGT_STEM} "_metallib.h")
|
||||
metal_to_metallib_h(${SHADER} ${SHADER_HDR_NAME})
|
||||
|
||||
@ -334,6 +334,14 @@ void Context::setBenchmarkLimitCuDNN(int b) {
|
||||
benchmark_limit_cudnn = b;
|
||||
}
|
||||
|
||||
bool Context::immediateMiopen() const {
|
||||
return immediate_miopen;
|
||||
}
|
||||
|
||||
void Context::setImmediateMiopen(bool b) {
|
||||
immediate_miopen = b;
|
||||
}
|
||||
|
||||
bool Context::allowTF32CuBLAS() const {
|
||||
#ifdef USE_ROCM
|
||||
const auto allow_tf32 = c10::utils::check_env(hipblaslt_allow_tf32);
|
||||
@ -472,6 +480,9 @@ at::BlasBackend Context::blasPreferredBackend() {
|
||||
// call site for blasPreferredBackend(), we set it to an actual value.
|
||||
if (blas_preferred_backend == at::BlasBackend::Default) {
|
||||
blas_preferred_backend = at::BlasBackend::Cublas;
|
||||
// This logic sits in the getter because it needs to validate
|
||||
// values set via env vars such as TORCH_BLAS_PREFER_CUBLASLT
|
||||
// which initialize the backend without calling the setter
|
||||
#ifdef USE_ROCM
|
||||
// AMD Instinct targets prefer hipblaslt
|
||||
static const bool hipblaslt_preferred = []() {
|
||||
@ -501,10 +512,14 @@ at::BlasBackend Context::blasPreferredBackend() {
|
||||
// hipblaslt support for all archs is not as complete as hipblas
|
||||
if (blas_preferred_backend == at::BlasBackend::Cublaslt) {
|
||||
static const bool hipblaslt_unsupported = []() {
|
||||
if(!hasCuBLASLt())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
static const std::vector<std::string> archs = {
|
||||
"gfx90a", "gfx942",
|
||||
#if ROCM_VERSION >= 60300
|
||||
"gfx1100", "gfx1101", "gfx1200", "gfx1201",
|
||||
"gfx1100", "gfx1101", "gfx1200", "gfx1201", "gfx908",
|
||||
#endif
|
||||
#if ROCM_VERSION >= 60500
|
||||
"gfx950"
|
||||
@ -526,6 +541,24 @@ at::BlasBackend Context::blasPreferredBackend() {
|
||||
return blas_preferred_backend;
|
||||
}
|
||||
|
||||
bool Context::ckSupported() {
|
||||
#ifdef USE_ROCM
|
||||
static const std::vector<std::string> supported_archs = {
|
||||
"gfx90a", "gfx942", "gfx950"
|
||||
};
|
||||
for (auto index : c10::irange(detail::getCUDAHooks().deviceCount())) {
|
||||
if(!detail::getCUDAHooks().isGPUArch(supported_archs, index)) {
|
||||
TORCH_WARN_ONCE(
|
||||
"Attempting to use CK on an unsupported architecture! Cannot set backend to CK");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Context::setBlasPreferredBackend(at::BlasBackend b) {
|
||||
#ifdef _MSC_VER
|
||||
TORCH_WARN_ONCE(
|
||||
@ -535,8 +568,14 @@ void Context::setBlasPreferredBackend(at::BlasBackend b) {
|
||||
#else
|
||||
TORCH_CHECK((b != at::BlasBackend::Cublaslt) || hasCuBLASLt(),
|
||||
"Cannot set preferred backend to cuBLASLt if PyTorch has not been compiled with cuBLASLt.");
|
||||
TORCH_CHECK((b != at::BlasBackend::Ck) || hasROCM(),
|
||||
"Cannot set preferred backend to Ck if PyTorch has not been compiled for ROCm.");
|
||||
#ifdef USE_ROCM
|
||||
static const bool ckSupportedFlag = ckSupported();
|
||||
static const bool hasCKGEMMFlag = hasCKGEMM();
|
||||
TORCH_CHECK((b != at::BlasBackend::Ck) || (ckSupportedFlag && hasCKGEMMFlag),
|
||||
"Cannot set preferred blas backend to CK since following conditions are not true: ",
|
||||
"architecture supported for CK: ", ckSupportedFlag,
|
||||
", PyTorch built with CK GEMM support: ", hasCKGEMMFlag);
|
||||
#endif
|
||||
if (b != at::BlasBackend::Default && b != at::BlasBackend::Cublas) {
|
||||
TORCH_WARN_ONCE(
|
||||
"torch.backends.cuda.preferred_blas_library is an experimental feature. "
|
||||
@ -548,35 +587,40 @@ void Context::setBlasPreferredBackend(at::BlasBackend b) {
|
||||
#endif
|
||||
}
|
||||
|
||||
at::ROCmFABackend Context::getROCmFAPreferredBackend() const {
|
||||
at::ROCmFABackend Context::getROCmFAPreferredBackend() {
|
||||
#ifdef USE_ROCM
|
||||
// Set potential "Default" value so we don't have to interpret at call sites.
|
||||
// We use aotriton backend as the default, for now.
|
||||
if(rocm_fa_preferred_backend == at::ROCmFABackend::Default) {
|
||||
rocm_fa_preferred_backend = at::ROCmFABackend::AOTriton;
|
||||
} else if (rocm_fa_preferred_backend == at::ROCmFABackend::Ck) {
|
||||
// This logic sits in the getter because it needs to validate
|
||||
// values set via env vars such as TORCH_ROCM_FA_PREFER_CK
|
||||
// which initialize the backend without calling the setter
|
||||
// Perform validity checking
|
||||
static const bool hasCKSDPAFlag = hasCKSDPA();
|
||||
static const bool ckSupportedFlag = ckSupported();
|
||||
if(!(hasCKSDPAFlag && ckSupportedFlag)){
|
||||
TORCH_WARN_ONCE(
|
||||
"Cannot set preferred SDPA backend to CK since following conditions are not true: ",
|
||||
"architecture supported for CK: ", ckSupportedFlag,
|
||||
", PyTorch built with CK SDPA support: ", hasCKSDPAFlag);
|
||||
rocm_fa_preferred_backend = at::ROCmFABackend::AOTriton;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return rocm_fa_preferred_backend;
|
||||
}
|
||||
|
||||
void Context::setROCmFAPreferredBackend(at::ROCmFABackend b) {
|
||||
|
||||
// TODO: add plumbing for hasCK for validity checking
|
||||
TORCH_CHECK((b != at::ROCmFABackend::Ck) || hasROCM(),
|
||||
"Cannot set preferred flash attention backend to Ck if PyTorch has not been compiled for ROCm.");
|
||||
#ifdef USE_ROCM
|
||||
if(b == at::ROCmFABackend::Ck) {
|
||||
static const bool ck_unsupported = []() {
|
||||
static const std::vector<std::string> archs = {
|
||||
"gfx90a", "gfx942"
|
||||
};
|
||||
for (auto index: c10::irange(detail::getCUDAHooks().deviceCount())) {
|
||||
if (!detail::getCUDAHooks().isGPUArch(archs, index)) {
|
||||
TORCH_WARN_ONCE(
|
||||
"Attempting to use CK on an unsupported architecture! Cannot set backend to CK");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}();
|
||||
if(!ck_unsupported) rocm_fa_preferred_backend = b;
|
||||
}
|
||||
else {
|
||||
rocm_fa_preferred_backend = b;
|
||||
}
|
||||
static const bool hasCKSDPAFlag = hasCKSDPA();
|
||||
static const bool ckSupportedFlag = ckSupported();
|
||||
TORCH_CHECK((b != at::ROCmFABackend::Ck) || (hasCKSDPAFlag && ckSupportedFlag),
|
||||
"Cannot set preferred SDPA backend to CK since following conditions are not true: ",
|
||||
"architecture supported for CK: ", ckSupportedFlag,
|
||||
", PyTorch built with CK SDPA support: ", hasCKSDPAFlag);
|
||||
#endif
|
||||
rocm_fa_preferred_backend = b;
|
||||
}
|
||||
|
||||
@ -132,6 +132,7 @@ class TORCH_API Context {
|
||||
static bool hasKleidiAI();
|
||||
static bool hasLAPACK();
|
||||
static bool hasMKLDNN();
|
||||
static bool ckSupported();
|
||||
static bool hasMAGMA() {
|
||||
return detail::getCUDAHooks().hasMAGMA();
|
||||
}
|
||||
@ -162,6 +163,12 @@ class TORCH_API Context {
|
||||
static bool hasROCM() {
|
||||
return detail::getCUDAHooks().hasROCM();
|
||||
}
|
||||
static bool hasCKSDPA() {
|
||||
return detail::getCUDAHooks().hasCKSDPA();
|
||||
}
|
||||
static bool hasCKGEMM() {
|
||||
return detail::getCUDAHooks().hasCKGEMM();
|
||||
}
|
||||
static bool hasHIP() {
|
||||
return detail::getHIPHooks().hasHIP();
|
||||
}
|
||||
@ -205,6 +212,8 @@ class TORCH_API Context {
|
||||
void setBenchmarkCuDNN(bool);
|
||||
int benchmarkLimitCuDNN() const;
|
||||
void setBenchmarkLimitCuDNN(int);
|
||||
bool immediateMiopen() const;
|
||||
void setImmediateMiopen(bool);
|
||||
bool deterministicCuDNN() const;
|
||||
void setDeterministicCuDNN(bool);
|
||||
bool deterministicMkldnn() const;
|
||||
@ -250,7 +259,7 @@ class TORCH_API Context {
|
||||
at::BlasBackend blasPreferredBackend();
|
||||
void setBlasPreferredBackend(at::BlasBackend);
|
||||
|
||||
at::ROCmFABackend getROCmFAPreferredBackend() const;
|
||||
at::ROCmFABackend getROCmFAPreferredBackend();
|
||||
void setROCmFAPreferredBackend(at::ROCmFABackend);
|
||||
|
||||
// Note [Enabling Deterministic Operations]
|
||||
@ -440,6 +449,7 @@ class TORCH_API Context {
|
||||
bool enabled_overrideable = true;
|
||||
bool allow_fp16_bf16_reduction_mathSDP = false;
|
||||
bool benchmark_cudnn = false;
|
||||
bool immediate_miopen = false;
|
||||
Float32MatmulPrecision float32_matmul_precision =
|
||||
c10::utils::check_env("TORCH_ALLOW_TF32_CUBLAS_OVERRIDE") == true
|
||||
? at::Float32MatmulPrecision::HIGH
|
||||
|
||||
@ -132,6 +132,9 @@ DLDevice torchDeviceToDLDevice(at::Device device) {
|
||||
case DeviceType::PrivateUse1:
|
||||
ctx.device_type = DLDeviceType::kDLExtDev;
|
||||
break;
|
||||
case DeviceType::MPS:
|
||||
ctx.device_type = DLDeviceType::kDLMetal;
|
||||
break;
|
||||
default:
|
||||
TORCH_CHECK_BUFFER(false, "Cannot pack tensors on " + device.str());
|
||||
}
|
||||
@ -164,6 +167,8 @@ static Device getATenDevice(DLDeviceType type, c10::DeviceIndex index, void* dat
|
||||
return at::Device(DeviceType::MAIA, index);
|
||||
case DLDeviceType::kDLExtDev:
|
||||
return at::Device(DeviceType::PrivateUse1, index);
|
||||
case DLDeviceType::kDLMetal:
|
||||
return at::Device(DeviceType::MPS, index);
|
||||
default:
|
||||
TORCH_CHECK_BUFFER(
|
||||
false, "Unsupported device_type: ", std::to_string(type));
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/CachingDeviceAllocator.h>
|
||||
#include <c10/core/DeviceType.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
@ -72,6 +73,27 @@ TORCH_API c10::DeviceIndex exchangeDevice(c10::DeviceIndex device_index);
|
||||
// original device index that was active before the change.
|
||||
TORCH_API c10::DeviceIndex maybeExchangeDevice(c10::DeviceIndex device_index);
|
||||
|
||||
TORCH_API inline void emptyCache() {
|
||||
const auto device_type = getAccelerator(true).value();
|
||||
at::getDeviceAllocator(device_type)->emptyCache();
|
||||
}
|
||||
|
||||
TORCH_API inline at::CachingDeviceAllocator::DeviceStats getDeviceStats(
|
||||
c10::DeviceIndex device_index) {
|
||||
const auto device_type = getAccelerator(true).value();
|
||||
return at::getDeviceAllocator(device_type)->getDeviceStats(device_index);
|
||||
}
|
||||
|
||||
TORCH_API inline void resetAccumulatedStats(c10::DeviceIndex device_index) {
|
||||
const auto device_type = getAccelerator(true).value();
|
||||
at::getDeviceAllocator(device_type)->resetAccumulatedStats(device_index);
|
||||
}
|
||||
|
||||
TORCH_API inline void resetPeakStats(c10::DeviceIndex device_index) {
|
||||
const auto device_type = getAccelerator(true).value();
|
||||
at::getDeviceAllocator(device_type)->resetPeakStats(device_index);
|
||||
}
|
||||
|
||||
} // namespace at::accelerator
|
||||
|
||||
namespace at {
|
||||
|
||||
@ -31,7 +31,9 @@ c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
|
||||
return at::globalContext().getPinnedMemoryAllocator(opt_device_type);
|
||||
} else {
|
||||
TORCH_CHECK(
|
||||
false, "Need to provide pin_memory allocator to use pin memory.")
|
||||
false,
|
||||
"pin_memory=True requires a CUDA or other accelerator backend; "
|
||||
"no pinned memory allocator is available on this system.")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -9,7 +9,36 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
// TODO: add a note explaining the design decisions
|
||||
/*
|
||||
* Design:
|
||||
* 1. ZeroTensors are regular tensors with TensorOptions, a storage
|
||||
* pointing to nullptr and a ZeroTensor dispatch key set.
|
||||
*
|
||||
* 2. ZeroTensors are immutable. This is done to prevent data race in the case of multithreading
|
||||
* (when two threads try to read the same zero tensor and materialize it in-place).
|
||||
*
|
||||
* 3. ZeroTensor has a boxed fallback that will be dispatched to any ops that don't
|
||||
* have special ZeroTensor handling. This fallback materializes each ZeroTensor to
|
||||
* `at::zeros({}, tensor.options()).expand(tensor.sizes())`.
|
||||
|
||||
* 4. ZeroTensors are handled above autograd. This is necessary because fallback
|
||||
* operations are not differentiable.
|
||||
* - Example: Consider add in the case it was using the fallback: zerotensor_a + b.
|
||||
* zerotensor_a would be materialized to c=torch.zeros_like(zerotensor_a) after
|
||||
* passing through the fallback. If this happens above the autograd, then the
|
||||
* gradients would be populated on c instead of zerotensor_a.
|
||||
*
|
||||
* 5. The grad field is always populated with an honest to goodness tensor. This
|
||||
* materialization of ZeroTensors will happen in:
|
||||
* - AccumulateGrad for Backward Mode AD.
|
||||
* - will never be required for ForwardMode AD.
|
||||
* - This is because if all the tangents were undefined (efficient ZeroTensors),
|
||||
* no computation will be performed (this is ensured via an existing pre-check).
|
||||
*
|
||||
* Today ZeroTensors are primarily used to represent undefined gradients in forward AD,
|
||||
* it does not perfectly handle NaNs and Infs as we don't check the actual values
|
||||
* and assume that they are non-zero, non-inf, non-NaN etc.
|
||||
*/
|
||||
// ZeroTensors are designed to be immutable. Thus, we error out when an in-place operation is performed on ZeroTensors
|
||||
static void zeroTensorFallback(const c10::OperatorHandle& op, DispatchKeySet dispatch_keys, torch::jit::Stack* stack) {
|
||||
const auto& arguments = op.schema().arguments();
|
||||
|
||||
@ -239,6 +239,7 @@ TORCH_LIBRARY_IMPL(aten, AutocastMPS, m) {
|
||||
KERNEL_MPS(scaled_dot_product_attention, lower_precision_fp)
|
||||
|
||||
// fp32
|
||||
KERNEL_MPS(conv_transpose3d, input, fp32)
|
||||
KERNEL_MPS(acos, fp32)
|
||||
KERNEL_MPS(asin, fp32)
|
||||
KERNEL_MPS(cosh, fp32)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user