mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-23 23:04:52 +08:00
Compare commits
1 Commits
delete-qua
...
lessw2020/
| Author | SHA1 | Date | |
|---|---|---|---|
| 0691f3f824 |
@ -79,7 +79,6 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
os.system(f"unzip {wheel_path} -d {folder}/tmp")
|
||||
libs_to_copy = [
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
|
||||
"/usr/local/cuda/lib64/libcudnn.so.9",
|
||||
"/usr/local/cuda/lib64/libcublas.so.12",
|
||||
"/usr/local/cuda/lib64/libcublasLt.so.12",
|
||||
@ -89,7 +88,6 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||
"/usr/local/cuda/lib64/libcusparseLt.so.0",
|
||||
"/usr/local/cuda/lib64/libcusolver.so.11",
|
||||
"/usr/local/cuda/lib64/libcurand.so.10",
|
||||
"/usr/local/cuda/lib64/libnccl.so.2",
|
||||
"/usr/local/cuda/lib64/libnvJitLink.so.12",
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.12",
|
||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9",
|
||||
|
||||
@ -275,6 +275,17 @@ case "$tag" in
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3-clang12-asan)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=12
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3-clang15-asan)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
CLANG_VERSION=15
|
||||
VISION=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3-clang18-asan)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
CLANG_VERSION=18
|
||||
|
||||
@ -1 +1 @@
|
||||
ae848267bebc65c6181e8cc5e64a6357d2679260
|
||||
c8757738a7418249896224430ce84888e8ecdd79
|
||||
|
||||
@ -3,10 +3,11 @@
|
||||
set -uex -o pipefail
|
||||
|
||||
PYTHON_DOWNLOAD_URL=https://www.python.org/ftp/python
|
||||
PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/heads # @lint-ignore
|
||||
GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
|
||||
|
||||
# Python versions to be installed in /opt/$VERSION_NO
|
||||
CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t 3.14.0 3.14.0t"}
|
||||
CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"}
|
||||
|
||||
function check_var {
|
||||
if [ -z "$1" ]; then
|
||||
@ -23,8 +24,9 @@ function do_cpython_build {
|
||||
tar -xzf Python-$py_ver.tgz
|
||||
|
||||
local additional_flags=""
|
||||
if [[ "$py_ver" == *"t" ]]; then
|
||||
if [ "$py_ver" == "3.13.0t" ]; then
|
||||
additional_flags=" --disable-gil"
|
||||
mv cpython-3.13/ cpython-3.13t/
|
||||
fi
|
||||
|
||||
pushd $py_folder
|
||||
@ -74,20 +76,24 @@ function do_cpython_build {
|
||||
function build_cpython {
|
||||
local py_ver=$1
|
||||
check_var $py_ver
|
||||
local py_suffix=$py_ver
|
||||
local py_folder=$py_ver
|
||||
check_var $PYTHON_DOWNLOAD_URL
|
||||
local py_ver_folder=$py_ver
|
||||
|
||||
# Special handling for nogil
|
||||
if [[ "${py_ver}" == *"t" ]]; then
|
||||
py_suffix=${py_ver::-1}
|
||||
py_folder=$py_suffix
|
||||
if [ "$py_ver" = "3.13.0t" ]; then
|
||||
PY_VER_SHORT="3.13"
|
||||
PYT_VER_SHORT="3.13t"
|
||||
check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH
|
||||
wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz
|
||||
do_cpython_build $py_ver cpython-$PYT_VER_SHORT
|
||||
elif [ "$py_ver" = "3.13.0" ]; then
|
||||
PY_VER_SHORT="3.13"
|
||||
check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH
|
||||
wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz
|
||||
do_cpython_build $py_ver cpython-$PY_VER_SHORT
|
||||
else
|
||||
wget -q $PYTHON_DOWNLOAD_URL/$py_ver_folder/Python-$py_ver.tgz
|
||||
do_cpython_build $py_ver Python-$py_ver
|
||||
fi
|
||||
# Only b3 is available now
|
||||
if [ "$py_suffix" == "3.14.0" ]; then
|
||||
py_suffix="3.14.0b3"
|
||||
fi
|
||||
wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
|
||||
do_cpython_build $py_ver Python-$py_suffix
|
||||
|
||||
rm -f Python-$py_ver.tgz
|
||||
}
|
||||
|
||||
@ -10,8 +10,6 @@ else
|
||||
arch_path='sbsa'
|
||||
fi
|
||||
|
||||
NVSHMEM_VERSION=3.3.9
|
||||
|
||||
function install_cuda {
|
||||
version=$1
|
||||
runfile=$2
|
||||
@ -42,52 +40,13 @@ function install_cudnn {
|
||||
rm -rf tmp_cudnn
|
||||
}
|
||||
|
||||
function install_nvshmem {
|
||||
cuda_major_version=$1 # e.g. "12"
|
||||
nvshmem_version=$2 # e.g. "3.3.9"
|
||||
|
||||
case "${arch_path}" in
|
||||
sbsa)
|
||||
dl_arch="aarch64"
|
||||
;;
|
||||
x86_64)
|
||||
dl_arch="x64"
|
||||
;;
|
||||
*)
|
||||
dl_arch="${arch}"
|
||||
;;
|
||||
esac
|
||||
|
||||
tmpdir="tmp_nvshmem"
|
||||
mkdir -p "${tmpdir}" && cd "${tmpdir}"
|
||||
|
||||
# nvSHMEM license: https://docs.nvidia.com/nvshmem/api/sla.html
|
||||
filename="libnvshmem_cuda${cuda_major_version}-linux-${arch_path}-${nvshmem_version}"
|
||||
url="https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version}/builds/cuda${cuda_major_version}/txz/agnostic/${dl_arch}/${filename}.tar.gz"
|
||||
|
||||
# download, unpack, install
|
||||
wget -q "${url}"
|
||||
tar xf "${filename}.tar.gz"
|
||||
cp -a "libnvshmem/include/"* /usr/local/include/
|
||||
cp -a "libnvshmem/lib/"* /usr/local/lib/
|
||||
|
||||
# cleanup
|
||||
cd ..
|
||||
rm -rf "${tmpdir}"
|
||||
|
||||
echo "nvSHMEM ${nvshmem_version} for CUDA ${cuda_major_version} (${arch_path}) installed."
|
||||
}
|
||||
|
||||
|
||||
function install_126 {
|
||||
CUDNN_VERSION=9.10.2.21
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.7.1"
|
||||
install_cuda 12.6.3 cuda_12.6.3_560.35.05_linux
|
||||
|
||||
install_cudnn 12 $CUDNN_VERSION
|
||||
|
||||
install_nvshmem 12 $NVSHMEM_VERSION
|
||||
|
||||
CUDA_VERSION=12.6 bash install_nccl.sh
|
||||
|
||||
CUDA_VERSION=12.6 bash install_cusparselt.sh
|
||||
@ -97,15 +56,13 @@ function install_126 {
|
||||
|
||||
function install_129 {
|
||||
CUDNN_VERSION=9.10.2.21
|
||||
echo "Installing CUDA 12.9.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
|
||||
echo "Installing CUDA 12.9.1 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.7.1"
|
||||
# install CUDA 12.9.1 in the same container
|
||||
install_cuda 12.9.1 cuda_12.9.1_575.57.08_linux
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
install_cudnn 12 $CUDNN_VERSION
|
||||
|
||||
install_nvshmem 12 $NVSHMEM_VERSION
|
||||
|
||||
CUDA_VERSION=12.9 bash install_nccl.sh
|
||||
|
||||
CUDA_VERSION=12.9 bash install_cusparselt.sh
|
||||
@ -149,15 +106,13 @@ function prune_126 {
|
||||
|
||||
function install_128 {
|
||||
CUDNN_VERSION=9.8.0.87
|
||||
echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
|
||||
echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.7.1"
|
||||
# install CUDA 12.8.1 in the same container
|
||||
install_cuda 12.8.1 cuda_12.8.1_570.124.06_linux
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
install_cudnn 12 $CUDNN_VERSION
|
||||
|
||||
install_nvshmem 12 $NVSHMEM_VERSION
|
||||
|
||||
CUDA_VERSION=12.8 bash install_nccl.sh
|
||||
|
||||
CUDA_VERSION=12.8 bash install_cusparselt.sh
|
||||
|
||||
@ -20,7 +20,7 @@ pip_install \
|
||||
|
||||
pip_install coloredlogs packaging
|
||||
pip_install onnxruntime==1.18.1
|
||||
pip_install onnxscript==0.3.1
|
||||
pip_install onnxscript==0.3.0
|
||||
|
||||
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
|
||||
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
|
||||
|
||||
@ -5,12 +5,7 @@ set -eou pipefail
|
||||
|
||||
function do_install() {
|
||||
rocm_version=$1
|
||||
if [[ ${rocm_version} =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
# chop off any patch version
|
||||
rocm_version="${rocm_version%.*}"
|
||||
fi
|
||||
|
||||
rocm_version_nodot=${rocm_version//./}
|
||||
rocm_version_nodot=${1//./}
|
||||
|
||||
# Version 2.7.2 + ROCm related updates
|
||||
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||
|
||||
@ -98,10 +98,6 @@ fi
|
||||
if [ -n "${NUMPY_VERSION}" ]; then
|
||||
pip_install "numpy==${NUMPY_VERSION}"
|
||||
fi
|
||||
|
||||
# IMPORTANT: helion needs to be installed without dependencies.
|
||||
# It depends on torch and triton. We don't want to install
|
||||
# triton and torch from production on Docker CI images
|
||||
if [[ "$ANACONDA_PYTHON_VERSION" != 3.9* ]]; then
|
||||
pip_install helion --no-deps
|
||||
pip_install helion
|
||||
fi
|
||||
|
||||
@ -39,10 +39,6 @@ case ${DOCKER_TAG_PREFIX} in
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
;;
|
||||
rocm*)
|
||||
# we want the patch version of 6.4 instead
|
||||
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
|
||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.1"
|
||||
fi
|
||||
BASE_TARGET=rocm
|
||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
|
||||
@ -131,8 +131,6 @@ RUN pip3 install flatbuffers && \
|
||||
git clone https://github.com/microsoft/onnxruntime && \
|
||||
cd onnxruntime && git checkout v1.21.0 && \
|
||||
git submodule update --init --recursive && \
|
||||
wget https://github.com/microsoft/onnxruntime/commit/f57db79743c4d1a3553aa05cf95bcd10966030e6.patch && \
|
||||
patch -p1 < f57db79743c4d1a3553aa05cf95bcd10966030e6.patch && \
|
||||
./build.sh --config Release --parallel 0 --enable_pybind \
|
||||
--build_wheel --enable_training --enable_training_apis \
|
||||
--enable_training_ops --skip_tests --allow_running_as_root \
|
||||
|
||||
@ -75,10 +75,6 @@ case ${image} in
|
||||
DOCKERFILE_SUFFIX="_cuda_aarch64"
|
||||
;;
|
||||
manylinux2_28-builder:rocm*)
|
||||
# we want the patch version of 6.4 instead
|
||||
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
|
||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.1"
|
||||
fi
|
||||
TARGET=rocm_final
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
DEVTOOLSET_VERSION="11"
|
||||
|
||||
@ -339,7 +339,7 @@ onnx==1.18.0
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
onnxscript==0.3.1
|
||||
onnxscript==0.2.6
|
||||
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
@ -383,6 +383,6 @@ cmake==4.0.0
|
||||
tlparse==0.3.30
|
||||
#Description: required for log parsing
|
||||
|
||||
cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x"
|
||||
cuda-bindings>=12.0,<13.0
|
||||
#Description: required for testing CUDAGraph::raw_cuda_graph(). See https://nvidia.github.io/cuda-python/cuda-bindings/latest/support.html for how this version was chosen. Note "Any fix in the latest bindings would be backported to the prior major version" means that only the newest version of cuda-bindings will get fixes. Depending on the latest version of 12.x is okay because all 12.y versions will be supported via "CUDA minor version compatibility". Pytorch builds against 13.z versions of cuda toolkit work with 12.x versions of cuda-bindings as well because newer drivers work with old toolkits.
|
||||
#test that import: test_cuda.py
|
||||
|
||||
@ -19,10 +19,9 @@ sphinx_sitemap==2.6.0
|
||||
#Description: This is used to generate sitemap for PyTorch docs
|
||||
#Pinned versions: 2.6.0
|
||||
|
||||
matplotlib==3.5.3 ; python_version < "3.13"
|
||||
matplotlib==3.6.3 ; python_version >= "3.13"
|
||||
matplotlib==3.5.3
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 3.6.3 if python > 3.12. Otherwise 3.5.3.
|
||||
#Pinned versions: 3.5.3
|
||||
|
||||
tensorboard==2.13.0 ; python_version < "3.13"
|
||||
tensorboard==2.18.0 ; python_version >= "3.13"
|
||||
|
||||
@ -1 +1 @@
|
||||
3.4.0
|
||||
3.3.1
|
||||
|
||||
@ -51,22 +51,20 @@ else
|
||||
fi
|
||||
|
||||
cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
|
||||
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
|
||||
case ${CUDA_VERSION} in
|
||||
#removing sm_50-sm_70 as these architectures are deprecated in CUDA 12.8/9 and will be removed in future releases
|
||||
12.8)
|
||||
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0"
|
||||
;;
|
||||
12.9)
|
||||
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0+PTX"
|
||||
12.8|12.9)
|
||||
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0+PTX" #removing sm_50-sm_70 as these architectures are deprecated in CUDA 12.8/9 and will be removed in future releases
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
# WAR to resolve the ld error in libtorch build with CUDA 12.9
|
||||
if [[ "$PACKAGE_TYPE" == "libtorch" ]]; then
|
||||
if [[ "$DESIRED_CUDA" == "cu129" && "$PACKAGE_TYPE" == "libtorch" ]]; then
|
||||
TORCH_CUDA_ARCH_LIST="7.5;8.0;9.0;10.0;12.0+PTX"
|
||||
fi
|
||||
;;
|
||||
12.6)
|
||||
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0"
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
*)
|
||||
echo "unknown cuda version $CUDA_VERSION"
|
||||
@ -133,8 +131,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so"
|
||||
"/usr/local/cuda/lib64/libcufile.so.0"
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12"
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libcudnn_adv.so.9"
|
||||
@ -153,14 +149,7 @@ if [[ $CUDA_VERSION == 12* ]]; then
|
||||
"libnvrtc-builtins.so"
|
||||
"libcufile.so.0"
|
||||
"libcufile_rdma.so.1"
|
||||
"libcupti.so.12"
|
||||
"libnvperf_host.so"
|
||||
)
|
||||
# Add libnvToolsExt only if CUDA version is not 12.9
|
||||
if [[ $CUDA_VERSION != 12.9* ]]; then
|
||||
DEPS_LIST+=("/usr/local/cuda/lib64/libnvToolsExt.so.1")
|
||||
DEPS_SONAME+=("libnvToolsExt.so.1")
|
||||
fi
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
CUDA_RPATHS=(
|
||||
|
||||
@ -198,8 +198,10 @@ fi
|
||||
|
||||
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
|
||||
# memory to build and will OOM
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]]; then
|
||||
export BUILD_CUSTOM_STEP="ninja -C build flash_attention -j 2"
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]] && [ -z "$MAX_JOBS_OVERRIDE" ]; then
|
||||
echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM"
|
||||
echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage"
|
||||
export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))"
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
|
||||
@ -393,8 +395,10 @@ else
|
||||
# This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization
|
||||
# is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has
|
||||
# 16 CPUs
|
||||
MAX_JOBS=$(nproc --ignore=4)
|
||||
export MAX_JOBS
|
||||
if [ -z "$MAX_JOBS_OVERRIDE" ]; then
|
||||
MAX_JOBS=$(nproc --ignore=4)
|
||||
export MAX_JOBS
|
||||
fi
|
||||
|
||||
# NB: Install outside of source directory (at the same level as the root
|
||||
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
|
||||
|
||||
@ -13,13 +13,6 @@ if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
|
||||
fi
|
||||
|
||||
if which sccache > /dev/null; then
|
||||
# Clear SCCACHE_BUCKET and SCCACHE_REGION if they are empty, otherwise
|
||||
# sccache will complain about invalid bucket configuration
|
||||
if [[ -z "${SCCACHE_BUCKET:-}" ]]; then
|
||||
unset SCCACHE_BUCKET
|
||||
unset SCCACHE_REGION
|
||||
fi
|
||||
|
||||
# Save sccache logs to file
|
||||
sccache --stop-server > /dev/null 2>&1 || true
|
||||
rm -f ~/sccache_error.log || true
|
||||
|
||||
@ -5,6 +5,11 @@ set -x
|
||||
# shellcheck source=./macos-common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
|
||||
|
||||
if [[ -n "$CONDA_ENV" ]]; then
|
||||
# Use binaries under conda environment
|
||||
export PATH="$CONDA_ENV/bin":$PATH
|
||||
fi
|
||||
|
||||
# Test that OpenMP is enabled
|
||||
pushd test
|
||||
if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
|
||||
|
||||
@ -11,8 +11,6 @@ export TERM=vt100
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
|
||||
# Do not change workspace permissions for ROCm and s390x CI jobs
|
||||
# as it can leave workspace with bad permissions for cancelled jobs
|
||||
@ -165,6 +163,8 @@ elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
|
||||
# setting PYTHON_TEST_EXTRA_OPTION
|
||||
export PYTHON_TEST_EXTRA_OPTION="--xpu"
|
||||
# Disable sccache for xpu test due to flaky issue https://github.com/pytorch/pytorch/issues/143585
|
||||
sudo rm -rf /opt/cache
|
||||
fi
|
||||
|
||||
if [[ "$TEST_CONFIG" == *crossref* ]]; then
|
||||
@ -330,15 +330,6 @@ test_h100_distributed() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_h100_symm_mem() {
|
||||
# symmetric memory test
|
||||
time python test/run_test.py --include distributed/test_symmetric_memory.py $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||
time python test/run_test.py --include distributed/test_nvshmem.py $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||
time python test/run_test.py --include distributed/test_nvshmem_triton.py $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||
time python test/run_test.py --include distributed/test_nccl.py $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_lazy_tensor_meta_reference_disabled() {
|
||||
export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1
|
||||
echo "Testing lazy tensor operations without meta reference"
|
||||
@ -353,7 +344,6 @@ test_dynamo_wrapped_shard() {
|
||||
exit 1
|
||||
fi
|
||||
python tools/dynamo/verify_dynamo.py
|
||||
python tools/dynamo/gb_id_mapping.py verify
|
||||
# PLEASE DO NOT ADD ADDITIONAL EXCLUDES HERE.
|
||||
# Instead, use @skipIfTorchDynamo on your tests.
|
||||
time python test/run_test.py --dynamo \
|
||||
@ -368,17 +358,6 @@ test_dynamo_wrapped_shard() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_einops() {
|
||||
pip install einops==0.6.1
|
||||
time python test/run_test.py --einops --verbose --upload-artifacts-while-running
|
||||
pip install einops==0.7.0
|
||||
time python test/run_test.py --einops --verbose --upload-artifacts-while-running
|
||||
pip install einops==0.8.1
|
||||
time python test/run_test.py --einops --verbose --upload-artifacts-while-running
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
|
||||
test_inductor_distributed() {
|
||||
# Smuggle a few multi-gpu tests here so that we don't have to request another large node
|
||||
echo "Testing multi_gpu tests in test_torchinductor"
|
||||
@ -436,21 +415,14 @@ test_inductor_aoti() {
|
||||
python3 tools/amd_build/build_amd.py
|
||||
fi
|
||||
if [[ "$BUILD_ENVIRONMENT" == *sm86* ]]; then
|
||||
BUILD_COMMAND=(TORCH_CUDA_ARCH_LIST=8.6 USE_FLASH_ATTENTION=OFF python setup.py develop)
|
||||
BUILD_AOT_INDUCTOR_TEST=1 TORCH_CUDA_ARCH_LIST=8.6 USE_FLASH_ATTENTION=OFF python setup.py develop
|
||||
# TODO: Replace me completely, as one should not use conda libstdc++, nor need special path to TORCH_LIB
|
||||
TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="/opt/conda/envs/py_3.10/lib:${TORCH_LIB_DIR}:${LD_LIBRARY_PATH}")
|
||||
LD_LIBRARY_PATH=/opt/conda/envs/py_3.10/lib/:${TORCH_LIB_DIR}:$LD_LIBRARY_PATH
|
||||
CPP_TESTS_DIR="${BUILD_BIN_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference -dist=loadfile
|
||||
else
|
||||
BUILD_COMMAND=(python setup.py develop)
|
||||
TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}")
|
||||
BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
|
||||
CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference -dist=loadfile
|
||||
fi
|
||||
|
||||
# aoti cmake custom command requires `torch` to be installed
|
||||
# initialize the cmake build cache and install torch
|
||||
/usr/bin/env "${BUILD_COMMAND[@]}"
|
||||
# rebuild with the build cache with `BUILD_AOT_INDUCTOR_TEST` enabled
|
||||
/usr/bin/env CMAKE_FRESH=1 BUILD_AOT_INDUCTOR_TEST=1 "${BUILD_COMMAND[@]}"
|
||||
|
||||
/usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference -dist=loadfile
|
||||
}
|
||||
|
||||
test_inductor_cpp_wrapper_shard() {
|
||||
@ -463,26 +435,47 @@ test_inductor_cpp_wrapper_shard() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
if [[ "$1" -eq "2" ]]; then
|
||||
# For now, manually put the opinfo tests in shard 2, and all other tests in
|
||||
# shard 1. Run all CPU tests, as well as specific GPU tests triggering past
|
||||
# bugs, for now.
|
||||
python test/run_test.py \
|
||||
--include inductor/test_torchinductor_opinfo \
|
||||
-k 'linalg or to_sparse or TestInductorOpInfoCPU' \
|
||||
--verbose
|
||||
exit
|
||||
fi
|
||||
|
||||
# Run certain inductor unit tests with cpp wrapper. In the end state, we
|
||||
# should be able to run all the inductor unit tests with cpp_wrapper.
|
||||
#
|
||||
# TODO: I'm pretty sure that "TestInductorOpInfoCPU" is not a valid filter,
|
||||
# but change that in another PR to more accurately monitor the increased CI
|
||||
# usage.
|
||||
python test/run_test.py \
|
||||
--include inductor/test_torchinductor_opinfo \
|
||||
-k 'linalg or to_sparse or TestInductorOpInfoCPU' \
|
||||
--shard "$1" "$NUM_TEST_SHARDS" \
|
||||
--verbose
|
||||
python test/run_test.py \
|
||||
--include inductor/test_torchinductor inductor/test_max_autotune inductor/test_cpu_repro \
|
||||
--shard "$1" "$NUM_TEST_SHARDS" \
|
||||
--verbose
|
||||
python test/run_test.py --inductor \
|
||||
--include test_torch \
|
||||
-k 'take' \
|
||||
--shard "$1" "$NUM_TEST_SHARDS" \
|
||||
--verbose
|
||||
python test/run_test.py --inductor --include test_torch -k 'take' --verbose
|
||||
|
||||
# Run inductor benchmark tests with cpp wrapper.
|
||||
# Skip benchmark tests if it's in rerun-disabled-mode.
|
||||
if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]]; then
|
||||
echo "skip dynamo benchmark tests for rerun-disabled-test"
|
||||
else
|
||||
echo "run dynamo benchmark tests with cpp wrapper"
|
||||
python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
|
||||
--training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
|
||||
--output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_timm_training.csv"
|
||||
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_torchbench_inference.csv"
|
||||
fi
|
||||
}
|
||||
|
||||
# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
|
||||
@ -603,8 +596,8 @@ test_perf_for_dashboard() {
|
||||
|
||||
local device=cuda
|
||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *cpu_x86_zen* ]]; then
|
||||
device=cpu_x86_zen
|
||||
if [[ "${TEST_CONFIG}" == *zen_cpu_x86* ]]; then
|
||||
device=zen_cpu_x86
|
||||
elif [[ "${TEST_CONFIG}" == *cpu_x86* ]]; then
|
||||
device=cpu_x86
|
||||
elif [[ "${TEST_CONFIG}" == *cpu_aarch64* ]]; then
|
||||
@ -621,11 +614,7 @@ test_perf_for_dashboard() {
|
||||
|
||||
for mode in "${modes[@]}"; do
|
||||
if [[ "$mode" == "inference" ]]; then
|
||||
if [[ "$device" == "cpu_x86" ]]; then
|
||||
dtype=amp
|
||||
else
|
||||
dtype=bfloat16
|
||||
fi
|
||||
dtype=bfloat16
|
||||
elif [[ "$mode" == "training" ]]; then
|
||||
dtype=amp
|
||||
fi
|
||||
@ -637,10 +626,6 @@ test_perf_for_dashboard() {
|
||||
target_flag+=( --no-translation-validation)
|
||||
fi
|
||||
|
||||
if [[ "$DASHBOARD_TAG" == *freezing-true* ]]; then
|
||||
target_flag+=( --freezing)
|
||||
fi
|
||||
|
||||
if [[ "$DASHBOARD_TAG" == *default-true* ]]; then
|
||||
$TASKSET python "benchmarks/dynamo/$suite.py" \
|
||||
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
|
||||
@ -1694,11 +1679,11 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
||||
install_torchaudio cuda
|
||||
install_torchvision
|
||||
checkout_install_torchbench hf_T5 llama moco
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
|
||||
if [[ "$SHARD_NUMBER" -eq "1" ]]; then
|
||||
test_inductor_aoti
|
||||
fi
|
||||
test_inductor_aoti
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||
install_torchvision
|
||||
test_inductor_shard "${SHARD_NUMBER}"
|
||||
@ -1707,8 +1692,6 @@ elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||
test_inductor_distributed
|
||||
fi
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *einops* ]]; then
|
||||
test_einops
|
||||
elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
|
||||
install_torchvision
|
||||
test_dynamo_wrapped_shard "${SHARD_NUMBER}"
|
||||
@ -1758,8 +1741,6 @@ elif [[ "${TEST_CONFIG}" == smoke ]]; then
|
||||
test_python_smoke
|
||||
elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then
|
||||
test_h100_distributed
|
||||
elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then
|
||||
test_h100_symm_mem
|
||||
else
|
||||
install_torchvision
|
||||
install_monkeytype
|
||||
|
||||
@ -16,7 +16,7 @@ target_link_libraries(simple-torch-test CUDA::cudart CUDA::cufft CUDA::cusparse
|
||||
find_library(CUDNN_LIBRARY NAMES cudnn)
|
||||
target_link_libraries(simple-torch-test ${CUDNN_LIBRARY} )
|
||||
if(MSVC)
|
||||
file(GLOB TORCH_DLLS "$ENV{CUDA_PATH}/bin/cudnn64_8.dll" "$ENV{NVTOOLSEXT_PATH}/bin/x64/*.dll")
|
||||
file(GLOB TORCH_DLLS "$ENV{CUDA_PATH}/bin/cudnn64_8.dll")
|
||||
message("dlls to copy " ${TORCH_DLLS})
|
||||
add_custom_command(TARGET simple-torch-test
|
||||
POST_BUILD
|
||||
|
||||
@ -52,9 +52,6 @@ python -m pip install parameterized==0.8.1
|
||||
# Install pulp for testing ilps under torch\distributed\_tools
|
||||
python -m pip install pulp==2.9.0
|
||||
|
||||
# Install expecttest to merge https://github.com/pytorch/pytorch/pull/155308
|
||||
python -m pip install expecttest==0.3.0
|
||||
|
||||
run_tests() {
|
||||
# Run nvidia-smi if available
|
||||
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
|
||||
|
||||
@ -18,15 +18,6 @@ REM Check for optional components
|
||||
set USE_CUDA=
|
||||
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
|
||||
|
||||
IF "%NVTOOLSEXT_PATH%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
) ELSE (
|
||||
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
|
||||
IF "%CUDA_PATH_V126%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin\nvcc.exe" (
|
||||
set "CUDA_PATH_V126=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6"
|
||||
|
||||
@ -18,15 +18,6 @@ REM Check for optional components
|
||||
set USE_CUDA=
|
||||
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
|
||||
|
||||
IF "%NVTOOLSEXT_PATH%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
) ELSE (
|
||||
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
|
||||
IF "%CUDA_PATH_V128%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin\nvcc.exe" (
|
||||
set "CUDA_PATH_V128=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8"
|
||||
|
||||
@ -18,15 +18,6 @@ REM Check for optional components
|
||||
set USE_CUDA=
|
||||
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
|
||||
|
||||
IF "%NVTOOLSEXT_PATH%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
) ELSE (
|
||||
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
|
||||
IF "%CUDA_PATH_V129%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9\bin\nvcc.exe" (
|
||||
set "CUDA_PATH_V129=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9"
|
||||
|
||||
@ -8,9 +8,7 @@ copy "%CUDA_PATH%\bin\cusolver*64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\bin\cudnn*64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\bin\nvrtc*64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\extras\CUPTI\lib64\nvperf_host*.dll*" pytorch\torch\lib
|
||||
|
||||
copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib
|
||||
copy "%PYTHON_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
|
||||
|
||||
:: Should be set in build_pytorch.bat
|
||||
|
||||
@ -119,11 +119,6 @@ goto cuda_common
|
||||
:: If you cannot find the CUDA version you want to build for here then please
|
||||
:: add it @ https://github.com/pytorch/test-infra/tree/main/aws/ami/windows
|
||||
if not exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
|
||||
if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
|
||||
curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "%SRC_DIR%\temp_build\NvToolsExt.7z"
|
||||
if errorlevel 1 exit /b 1
|
||||
)
|
||||
|
||||
if not exist "%SRC_DIR%\temp_build\gpu_driver_dlls.zip" (
|
||||
curl -k -L "https://ossci-windows.s3.us-east-1.amazonaws.com/builder/additional_dlls.zip" --output "%SRC_DIR%\temp_build\gpu_driver_dlls.zip"
|
||||
if errorlevel 1 exit /b 1
|
||||
@ -150,15 +145,6 @@ if not exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_
|
||||
xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v170\BuildCustomizations"
|
||||
)
|
||||
|
||||
echo Installing NvToolsExt...
|
||||
7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt"
|
||||
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
|
||||
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
|
||||
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
|
||||
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
|
||||
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
|
||||
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
|
||||
|
||||
echo Installing cuDNN...
|
||||
7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn"
|
||||
xcopy /Y "%SRC_DIR%\temp_build\cudnn\%CUDNN_FOLDER%\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin"
|
||||
@ -189,4 +175,3 @@ echo Setting up environment...
|
||||
set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%"
|
||||
set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
|
||||
set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
|
||||
set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt"
|
||||
|
||||
@ -18,5 +18,3 @@ start /wait "" python-amd64.exe /quiet InstallAllUsers=1 PrependPath=0 Include_t
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
set "PATH=%CD%\Python\Scripts;%CD%\Python;%PATH%"
|
||||
%PYTHON_EXEC% -m pip install --upgrade pip setuptools packaging wheel
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
@ -75,8 +75,8 @@ TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
|
||||
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
|
||||
# CUDA 12.9 builds have triton for Linux and Linux aarch64 binaries.
|
||||
if [[ "$DESIRED_CUDA" == "cu129" ]]; then
|
||||
# CUDA 12.8 builds have triton for Linux and Linux aarch64 binaries.
|
||||
if [[ "$DESIRED_CUDA" == cu128 ]]; then
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
||||
fi
|
||||
|
||||
|
||||
@ -125,7 +125,7 @@ runs:
|
||||
TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
SCHEDULE: ${{ github.event.schedule }}
|
||||
HEAD_BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
|
||||
id: filter
|
||||
run: |
|
||||
echo "Workflow: ${GITHUB_WORKFLOW}"
|
||||
|
||||
@ -304,7 +304,8 @@ def unzip_artifact_and_replace_files() -> None:
|
||||
|
||||
|
||||
def set_output() -> None:
|
||||
print("Setting output reuse=true")
|
||||
# Disable for now so we can monitor first
|
||||
# pass
|
||||
if os.getenv("GITHUB_OUTPUT"):
|
||||
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
|
||||
print("reuse=true", file=env)
|
||||
|
||||
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
||||
70caf76066ef2c1054d6128b11769dc816a779e7
|
||||
4e94321c54617dd738a05bfedfc28bc0fa635b5c
|
||||
|
||||
2
.github/ci_commit_pins/xla.txt
vendored
2
.github/ci_commit_pins/xla.txt
vendored
@ -1 +1 @@
|
||||
1c00dea2c9adb2137903c86b4191e8c247f8fda9
|
||||
55a75404c9b75cd5fd62ab5d4deafc8c506b3af2
|
||||
|
||||
9
.github/label_to_label.yml
vendored
9
.github/label_to_label.yml
vendored
@ -48,12 +48,3 @@
|
||||
- "module: dynamic shapes"
|
||||
then:
|
||||
- "oncall: pt2"
|
||||
- any:
|
||||
- "release notes: distributed (c10d)"
|
||||
- "release notes: distributed (symm_mem)"
|
||||
- "release notes: distributed (pipeline)"
|
||||
- "release notes: distributed (fsdp)"
|
||||
- "release notes: distributed (dtensor)"
|
||||
- "oncall: distributed"
|
||||
then:
|
||||
- "ciflow/h100-distributed"
|
||||
|
||||
2
.github/merge_rules.yaml
vendored
2
.github/merge_rules.yaml
vendored
@ -384,7 +384,6 @@
|
||||
- leslie-fang-intel
|
||||
- jgong5
|
||||
- EikanWang
|
||||
- CaoE
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
@ -436,7 +435,6 @@
|
||||
approved_by:
|
||||
- leslie-fang-intel
|
||||
- jgong5
|
||||
- CaoE
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
|
||||
2
.github/pytorch-probot.yml
vendored
2
.github/pytorch-probot.yml
vendored
@ -4,7 +4,6 @@ ciflow_push_tags:
|
||||
- ciflow/binaries
|
||||
- ciflow/binaries_libtorch
|
||||
- ciflow/binaries_wheel
|
||||
- ciflow/triton_binaries
|
||||
- ciflow/inductor
|
||||
- ciflow/inductor-periodic
|
||||
- ciflow/inductor-rocm
|
||||
@ -31,7 +30,6 @@ ciflow_push_tags:
|
||||
- ciflow/pull
|
||||
- ciflow/h100
|
||||
- ciflow/h100-distributed
|
||||
- ciflow/h100-symm-mem
|
||||
retryable_workflows:
|
||||
- pull
|
||||
- trunk
|
||||
|
||||
30
.github/scripts/delete_old_branches.py
vendored
30
.github/scripts/delete_old_branches.py
vendored
@ -275,7 +275,7 @@ def delete_branches() -> None:
|
||||
delete_branch(git_repo, branch)
|
||||
|
||||
|
||||
def delete_old_tags() -> None:
|
||||
def delete_old_ciflow_tags() -> None:
|
||||
# Deletes ciflow tags if they are associated with a closed PR or a specific
|
||||
# commit. Lightweight tags don't have information about the date they were
|
||||
# created, so we can't check how old they are. The script just assumes that
|
||||
@ -288,29 +288,23 @@ def delete_old_tags() -> None:
|
||||
delete_branch(git_repo, f"refs/tags/{tag}")
|
||||
|
||||
tags = git_repo._run_git("tag").splitlines()
|
||||
open_pr_numbers = [x["number"] for x in get_open_prs()]
|
||||
|
||||
CIFLOW_TAG_REGEX = re.compile(r"^ciflow\/.*\/(\d{5,6}|[0-9a-f]{40})$")
|
||||
AUTO_REVERT_TAG_REGEX = re.compile(r"^trunk\/[0-9a-f]{40}$")
|
||||
for tag in tags:
|
||||
try:
|
||||
if ESTIMATED_TOKENS[0] > 400:
|
||||
print("Estimated tokens exceeded, exiting")
|
||||
break
|
||||
|
||||
if not CIFLOW_TAG_REGEX.match(tag) and not AUTO_REVERT_TAG_REGEX.match(tag):
|
||||
if not tag.startswith("ciflow/"):
|
||||
continue
|
||||
|
||||
# This checks the date of the commit associated with the tag instead
|
||||
# of the tag itself since lightweight tags don't have this
|
||||
# information. I think it should be ok since this only runs once a
|
||||
# day
|
||||
tag_info = git_repo._run_git("show", "-s", "--format=%ct", tag)
|
||||
tag_timestamp = int(tag_info.strip())
|
||||
# Maybe some timezone issues, but a few hours shouldn't matter
|
||||
tag_age_days = (datetime.now().timestamp() - tag_timestamp) / SEC_IN_DAY
|
||||
|
||||
if tag_age_days > 7:
|
||||
print(f"[{tag}] Tag is older than 7 days, deleting")
|
||||
re_match_pr = re.match(r"^ciflow\/.*\/(\d{5,6})$", tag)
|
||||
re_match_sha = re.match(r"^ciflow\/.*\/([0-9a-f]{40})$", tag)
|
||||
if re_match_pr:
|
||||
pr_number = int(re_match_pr.group(1))
|
||||
if pr_number in open_pr_numbers:
|
||||
continue
|
||||
delete_tag(tag)
|
||||
elif re_match_sha:
|
||||
delete_tag(tag)
|
||||
except Exception as e:
|
||||
print(f"Failed to check tag {tag}: {e}")
|
||||
@ -318,4 +312,4 @@ def delete_old_tags() -> None:
|
||||
|
||||
if __name__ == "__main__":
|
||||
delete_branches()
|
||||
delete_old_tags()
|
||||
delete_old_ciflow_tags()
|
||||
|
||||
16
.github/scripts/filter_test_configs.py
vendored
16
.github/scripts/filter_test_configs.py
vendored
@ -18,7 +18,6 @@ import yaml
|
||||
|
||||
|
||||
REENABLE_TEST_REGEX = "(?i)(Close(d|s)?|Resolve(d|s)?|Fix(ed|es)?) (#|https://github.com/pytorch/pytorch/issues/)([0-9]+)"
|
||||
MAIN_BRANCH = "main"
|
||||
|
||||
PREFIX = "test-config/"
|
||||
|
||||
@ -98,7 +97,7 @@ def parse_args() -> Any:
|
||||
parser.add_argument(
|
||||
"--branch",
|
||||
type=str,
|
||||
default=MAIN_BRANCH,
|
||||
default="main",
|
||||
help="the branch name",
|
||||
)
|
||||
return parser.parse_args()
|
||||
@ -457,7 +456,6 @@ def download_json(url: str, headers: dict[str, str], num_retries: int = 3) -> An
|
||||
|
||||
|
||||
def set_output(name: str, val: Any) -> None:
|
||||
print(f"Setting output {name}={val}")
|
||||
if os.getenv("GITHUB_OUTPUT"):
|
||||
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
|
||||
print(f"{name}={val}", file=env)
|
||||
@ -497,20 +495,13 @@ def check_for_setting(labels: set[str], body: str, setting: str) -> bool:
|
||||
|
||||
|
||||
def perform_misc_tasks(
|
||||
labels: set[str],
|
||||
test_matrix: dict[str, list[Any]],
|
||||
job_name: str,
|
||||
pr_body: str,
|
||||
branch: Optional[str] = None,
|
||||
labels: set[str], test_matrix: dict[str, list[Any]], job_name: str, pr_body: str
|
||||
) -> None:
|
||||
"""
|
||||
In addition to apply the filter logic, the script also does the following
|
||||
misc tasks to set keep-going and is-unstable variables
|
||||
"""
|
||||
set_output(
|
||||
"keep-going",
|
||||
branch == MAIN_BRANCH or check_for_setting(labels, pr_body, "keep-going"),
|
||||
)
|
||||
set_output("keep-going", check_for_setting(labels, pr_body, "keep-going"))
|
||||
set_output(
|
||||
"ci-verbose-test-logs",
|
||||
check_for_setting(labels, pr_body, "ci-verbose-test-logs"),
|
||||
@ -633,7 +624,6 @@ def main() -> None:
|
||||
test_matrix=filtered_test_matrix,
|
||||
job_name=args.job_name,
|
||||
pr_body=pr_body if pr_body else "",
|
||||
branch=args.branch,
|
||||
)
|
||||
|
||||
# Set the filtered test matrix as the output
|
||||
|
||||
@ -17,7 +17,7 @@ from typing import Optional
|
||||
|
||||
# NOTE: Please also update the CUDA sources in `PIP_SOURCES` in tools/nightly.py when changing this
|
||||
CUDA_ARCHES = ["12.6", "12.8", "12.9"]
|
||||
CUDA_STABLE = "12.8"
|
||||
CUDA_STABLE = "12.6"
|
||||
CUDA_ARCHES_FULL_VERSION = {
|
||||
"12.6": "12.6.3",
|
||||
"12.8": "12.8.1",
|
||||
@ -54,7 +54,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
@ -71,7 +71,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
@ -88,7 +88,6 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
|
||||
2
.github/scripts/get_workflow_job_id.py
vendored
2
.github/scripts/get_workflow_job_id.py
vendored
@ -136,10 +136,10 @@ def find_job_id_name(args: Any) -> tuple[str, str]:
|
||||
|
||||
|
||||
def set_output(name: str, val: Any) -> None:
|
||||
print(f"Setting output {name}={val}")
|
||||
if os.getenv("GITHUB_OUTPUT"):
|
||||
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
|
||||
print(f"{name}={val}", file=env)
|
||||
print(f"setting {name}={val}")
|
||||
else:
|
||||
print(f"::set-output name={name}::{val}")
|
||||
|
||||
|
||||
1
.github/scripts/parse_ref.py
vendored
1
.github/scripts/parse_ref.py
vendored
@ -5,7 +5,6 @@ import re
|
||||
|
||||
|
||||
def set_output(name: str, val: str) -> None:
|
||||
print(f"Setting output {name}={val}")
|
||||
if os.getenv("GITHUB_OUTPUT"):
|
||||
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
|
||||
print(f"{name}={val}", file=env)
|
||||
|
||||
64
.github/scripts/tag_docker_images_for_release.py
vendored
Normal file
64
.github/scripts/tag_docker_images_for_release.py
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
import generate_binary_build_matrix
|
||||
|
||||
|
||||
def tag_image(
|
||||
image: str,
|
||||
default_tag: str,
|
||||
release_version: str,
|
||||
dry_run: str,
|
||||
tagged_images: dict[str, bool],
|
||||
) -> None:
|
||||
if image in tagged_images:
|
||||
return
|
||||
release_image = image.replace(f"-{default_tag}", f"-{release_version}")
|
||||
print(f"Tagging {image} to {release_image} , dry_run: {dry_run}")
|
||||
|
||||
if dry_run == "disabled":
|
||||
subprocess.check_call(["docker", "pull", image])
|
||||
subprocess.check_call(["docker", "tag", image, release_image])
|
||||
subprocess.check_call(["docker", "push", release_image])
|
||||
tagged_images[image] = True
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
help="Version to tag",
|
||||
type=str,
|
||||
default="2.2",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
help="No Runtime Error check",
|
||||
type=str,
|
||||
choices=["enabled", "disabled"],
|
||||
default="enabled",
|
||||
)
|
||||
|
||||
options = parser.parse_args()
|
||||
tagged_images: dict[str, bool] = {}
|
||||
platform_images = [
|
||||
generate_binary_build_matrix.WHEEL_CONTAINER_IMAGES,
|
||||
generate_binary_build_matrix.LIBTORCH_CONTAINER_IMAGES,
|
||||
]
|
||||
default_tag = generate_binary_build_matrix.DEFAULT_TAG
|
||||
|
||||
for platform_image in platform_images: # type: ignore[attr-defined]
|
||||
for arch in platform_image.keys(): # type: ignore[attr-defined]
|
||||
if arch == "cpu-s390x":
|
||||
continue
|
||||
tag_image(
|
||||
platform_image[arch], # type: ignore[index]
|
||||
default_tag,
|
||||
options.version,
|
||||
options.dry_run,
|
||||
tagged_images,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
56
.github/scripts/test_delete_old_branches.py
vendored
56
.github/scripts/test_delete_old_branches.py
vendored
@ -1,56 +0,0 @@
|
||||
import os
|
||||
import unittest
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
os.environ["GITHUB_TOKEN"] = "test_token"
|
||||
|
||||
from delete_old_branches import delete_old_tags
|
||||
|
||||
|
||||
@patch("delete_old_branches.delete_branch")
|
||||
@patch("gitutils.GitRepo._run_git")
|
||||
class TestDeleteTag(unittest.TestCase):
|
||||
def test_delete_tag(
|
||||
self, mock_run_git: "MagicMock", mock_delete_tag: "MagicMock"
|
||||
) -> None:
|
||||
for tag in [
|
||||
"ciflow/branch/12345",
|
||||
"ciflow/commitsha/1234567890abcdef1234567890abcdef12345678",
|
||||
"trunk/1234567890abcdef1234567890abcdef12345678",
|
||||
]:
|
||||
mock_run_git.side_effect = [
|
||||
tag,
|
||||
str(int(datetime.now().timestamp() - 8 * 24 * 60 * 60)), # 8 days ago
|
||||
]
|
||||
delete_old_tags()
|
||||
mock_delete_tag.assert_called_once()
|
||||
mock_delete_tag.reset_mock()
|
||||
|
||||
# Don't delete if the tag is not old enough
|
||||
mock_run_git.side_effect = [
|
||||
tag,
|
||||
str(int(datetime.now().timestamp() - 6 * 24 * 60 * 60)), # 6 days ago
|
||||
]
|
||||
delete_old_tags()
|
||||
mock_delete_tag.assert_not_called()
|
||||
|
||||
def test_do_not_delete_tag(
|
||||
self, mock_run_git: "MagicMock", mock_delete_tag: "MagicMock"
|
||||
) -> None:
|
||||
for tag in [
|
||||
"ciflow/doesntseemtomatch",
|
||||
"trunk/doesntseemtomatch",
|
||||
"doesntseemtomatch",
|
||||
]:
|
||||
mock_run_git.side_effect = [
|
||||
tag,
|
||||
str(int(datetime.now().timestamp() - 8 * 24 * 60 * 60)), # 8 days ago
|
||||
]
|
||||
delete_old_tags()
|
||||
mock_delete_tag.assert_not_called()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
1
.github/scripts/windows/build_magma.bat
vendored
1
.github/scripts/windows/build_magma.bat
vendored
@ -17,7 +17,6 @@ if errorlevel 1 exit /b 1
|
||||
|
||||
set "PATH=C:\Tools;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUVER%\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUVER%\libnvvp;%PATH%"
|
||||
set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUVER%
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
|
||||
mkdir magma_cuda%CUVER_NODOT%
|
||||
cd magma_cuda%CUVER_NODOT%
|
||||
|
||||
15
.github/workflows/_linux-build.yml
vendored
15
.github/workflows/_linux-build.yml
vendored
@ -69,6 +69,11 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
max-jobs:
|
||||
description: |
|
||||
Overwrite the number of jobs to use for the build
|
||||
required: false
|
||||
type: string
|
||||
disable-monitor:
|
||||
description: |
|
||||
Disable utilization monitoring for build job
|
||||
@ -261,6 +266,7 @@ jobs:
|
||||
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
MAX_JOBS_OVERRIDE: ${{ inputs.max-jobs }}
|
||||
run: |
|
||||
START_TIME=$(date +%s)
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
|
||||
@ -280,6 +286,12 @@ jobs:
|
||||
DOCKER_SHELL_CMD=
|
||||
fi
|
||||
|
||||
if [[ ${MAX_JOBS_OVERRIDE} == "" ]]; then
|
||||
MAX_JOBS="$(nproc --ignore=2)"
|
||||
else
|
||||
MAX_JOBS="${MAX_JOBS_OVERRIDE}"
|
||||
fi
|
||||
|
||||
# Leaving 1GB for the runner and other things
|
||||
TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo)
|
||||
# https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap
|
||||
@ -291,7 +303,8 @@ jobs:
|
||||
# shellcheck disable=SC2086
|
||||
container_name=$(docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e MAX_JOBS=${MAX_JOBS} \
|
||||
-e MAX_JOBS_OVERRIDE \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e PR_NUMBER \
|
||||
-e SHA1 \
|
||||
|
||||
53
.github/workflows/_linux-test.yml
vendored
53
.github/workflows/_linux-test.yml
vendored
@ -90,13 +90,10 @@ jobs:
|
||||
environment: ${{ github.ref == 'refs/heads/main' && 'scribe-protected' || startsWith(github.ref, 'refs/heads/release/') && 'scribe-protected' || contains(github.event.pull_request.labels.*.name, 'ci-scribe') && 'scribe-pr' || '' }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Setup SSH (Click me for login details)
|
||||
uses: pytorch/test-infra/.github/actions/setup-ssh@main
|
||||
if: ${{ matrix.runner != 'B200' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
|
||||
if: ${{ !contains(matrix.runner, 'gcp.a100') && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
|
||||
with:
|
||||
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
||||
instructions: |
|
||||
@ -108,31 +105,18 @@ jobs:
|
||||
with:
|
||||
no-sudo: true
|
||||
|
||||
- name: Setup Python
|
||||
if: matrix.runner == 'B200'
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
cache: pip
|
||||
|
||||
- name: Setup Linux
|
||||
uses: ./.github/actions/setup-linux
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && matrix.runner != 'B200'
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
|
||||
- name: configure aws credentials
|
||||
if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
|
||||
if : ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
|
||||
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
|
||||
with:
|
||||
role-to-assume: ${{ inputs.aws-role-to-assume }}
|
||||
role-session-name: gha-linux-test
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Login to Amazon ECR
|
||||
if: ${{ inputs.aws-role-to-assume != '' && matrix.runner == 'B200' }}
|
||||
id: login-ecr
|
||||
continue-on-error: true
|
||||
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
|
||||
|
||||
- name: Calculate docker image
|
||||
id: calculate-docker-image
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||
@ -164,17 +148,17 @@ jobs:
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
id: install-nvidia-driver
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && matrix.runner != 'B200' }}
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
|
||||
- name: Setup GPU_FLAG for docker run
|
||||
id: setup-gpu-flag
|
||||
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || matrix.runner == 'B200') }}
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
|
||||
|
||||
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
|
||||
id: setup-sscache-port-flag
|
||||
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && matrix.runner != 'B200' }}
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
|
||||
|
||||
- name: Lock NVIDIA A100 40GB Frequency
|
||||
run: |
|
||||
@ -241,12 +225,6 @@ jobs:
|
||||
run: |
|
||||
echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Preserve github env variables for use in docker
|
||||
shell: bash
|
||||
run: |
|
||||
env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
|
||||
- name: Test
|
||||
id: test
|
||||
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
|
||||
@ -275,8 +253,8 @@ jobs:
|
||||
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
|
||||
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
|
||||
# Do not set SCCACHE_S3_KEY_PREFIX to share the cache between all build jobs
|
||||
SCCACHE_BUCKET: ${{ matrix.runner != 'B200' && 'ossci-compiler-cache-circleci-v2' || '' }}
|
||||
SCCACHE_REGION: ${{ matrix.runner != 'B200' && 'us-east-1' || '' }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
SCCACHE_REGION: us-east-1
|
||||
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
|
||||
DOCKER_IMAGE: ${{ inputs.docker-image }}
|
||||
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
|
||||
@ -286,6 +264,7 @@ jobs:
|
||||
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
|
||||
ARTIFACTS_FILE_SUFFIX: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
|
||||
run: |
|
||||
set -x
|
||||
@ -311,6 +290,10 @@ jobs:
|
||||
# if for some reason cleanup action doesn't stop container
|
||||
# when job is cancelled
|
||||
DOCKER_SHELL_CMD="sleep 12h"
|
||||
|
||||
# since some steps are skipped on s390x, if they are necessary, run them here
|
||||
env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
else
|
||||
SHM_OPTS="--shm-size=${SHM_SIZE}"
|
||||
JENKINS_USER="--user jenkins"
|
||||
@ -362,6 +345,7 @@ jobs:
|
||||
-e HUGGING_FACE_HUB_TOKEN \
|
||||
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
|
||||
-e DASHBOARD_TAG \
|
||||
-e IS_A100_RUNNER \
|
||||
-e ARTIFACTS_FILE_SUFFIX \
|
||||
--memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \
|
||||
--memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \
|
||||
@ -400,15 +384,6 @@ jobs:
|
||||
test_config: ${{ matrix.config }}
|
||||
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
||||
|
||||
- name: Authenticate with AWS
|
||||
if: ${{ matrix.runner == 'B200' }}
|
||||
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
|
||||
# The max duration enforced by the server side
|
||||
role-duration-seconds: 18000
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Upload the benchmark results
|
||||
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
|
||||
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
|
||||
2
.github/workflows/_mac-build.yml
vendored
2
.github/workflows/_mac-build.yml
vendored
@ -123,7 +123,7 @@ jobs:
|
||||
else
|
||||
# The runner has access to the S3 bucket via IAM profile without the need
|
||||
# for any credential
|
||||
echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
|
||||
echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"0
|
||||
echo "SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${GITHUB_ENV}"
|
||||
fi
|
||||
|
||||
|
||||
48
.github/workflows/_mac-test.yml
vendored
48
.github/workflows/_mac-test.yml
vendored
@ -60,6 +60,8 @@ jobs:
|
||||
test:
|
||||
# Don't run on forked repos or empty test matrix
|
||||
if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
|
||||
# For setup-miniconda, see https://github.com/conda-incubator/setup-miniconda/issues/179
|
||||
# Also ensure that we always run with the right architecture
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -e -l {0}
|
||||
@ -88,10 +90,6 @@ jobs:
|
||||
pkill "${PROCESS}" || true
|
||||
done
|
||||
|
||||
- name: Clean up leftover miniconda installation
|
||||
continue-on-error: true
|
||||
run: brew uninstall miniconda || true
|
||||
|
||||
- name: Clean up leftover local python3 site-packages on MacOS pet runner
|
||||
continue-on-error: true
|
||||
run: |
|
||||
@ -126,8 +124,8 @@ jobs:
|
||||
MONITOR_LOG_INTERVAL: ${{ inputs.monitor-log-interval }}
|
||||
MONITOR_DATA_COLLECT_INTERVAL: ${{ inputs.monitor-data-collect-interval }}
|
||||
run: |
|
||||
python3 -m pip install psutil==5.9.1 dataclasses_json==0.6.7
|
||||
python3 -m tools.stats.monitor --log-interval "$MONITOR_LOG_INTERVAL" --data-collect-interval "$MONITOR_DATA_COLLECT_INTERVAL" > usage_log.txt 2>&1 &
|
||||
${CONDA_RUN} python3 -m pip install psutil==5.9.1 dataclasses_json==0.6.7
|
||||
${CONDA_RUN} python3 -m tools.stats.monitor --log-interval "$MONITOR_LOG_INTERVAL" --data-collect-interval "$MONITOR_DATA_COLLECT_INTERVAL" > usage_log.txt 2>&1 &
|
||||
echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Download build artifacts
|
||||
@ -142,10 +140,11 @@ jobs:
|
||||
with:
|
||||
use-gha: true
|
||||
|
||||
- name: Setup Python
|
||||
uses: pytorch/test-infra/.github/actions/setup-python@main
|
||||
- name: Setup miniconda
|
||||
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
environment-file: .github/requirements/conda-env-macOS-ARM64
|
||||
pip-requirements-file: .github/requirements/pip-requirements-macOS.txt
|
||||
default-packages: ""
|
||||
|
||||
@ -198,32 +197,37 @@ jobs:
|
||||
# shellcheck disable=SC1090
|
||||
set -ex
|
||||
|
||||
# TODO: Remove me later, and properly activate venv
|
||||
PATH="$(dirname "$(which python)"):$PATH"
|
||||
export PATH
|
||||
arch
|
||||
|
||||
if [[ -n "$CONDA_ENV" ]]; then
|
||||
# Use binaries under conda environment
|
||||
export PATH="$CONDA_ENV/bin":$PATH
|
||||
fi
|
||||
|
||||
# Print out some information about the test environment
|
||||
for tool in python3 python; do
|
||||
which $tool
|
||||
$tool --version
|
||||
done
|
||||
which conda
|
||||
conda --version
|
||||
${CONDA_RUN} which python3
|
||||
${CONDA_RUN} python3 --version
|
||||
${CONDA_RUN} which python
|
||||
${CONDA_RUN} python --version
|
||||
|
||||
python3 -mpip install --no-index --no-deps dist/*.whl
|
||||
${CONDA_RUN} python3 -mpip install --no-index --no-deps dist/*.whl
|
||||
|
||||
set +e
|
||||
pushd "${RUNNER_TEMP}"
|
||||
# Install pip dependencies if they are not found. This is to mitigate a peculiar
|
||||
# flaky missing dependencies on MacOS
|
||||
python3 -c "import torch"
|
||||
${CONDA_RUN} python3 -c "import torch"
|
||||
RC=$?
|
||||
popd
|
||||
|
||||
if [ "${RC}" -ne 0 ]; then
|
||||
python3 -mpip install --ignore-installed -r "${PIP_REQUIREMENTS_FILE}"
|
||||
${CONDA_RUN} python3 -mpip install --ignore-installed -r "${PIP_REQUIREMENTS_FILE}"
|
||||
fi
|
||||
set -e
|
||||
|
||||
.ci/pytorch/macos-test.sh
|
||||
${CONDA_RUN} .ci/pytorch/macos-test.sh
|
||||
|
||||
- name: Print remaining test logs
|
||||
shell: bash
|
||||
@ -235,7 +239,11 @@ jobs:
|
||||
shell: bash
|
||||
if: ${{ contains(steps.get-job-id.outputs.job-name, 'mps') }}
|
||||
run: |
|
||||
python3 test/bench_mps_ops.py
|
||||
if [[ -n "$CONDA_ENV" ]]; then
|
||||
# Use binaries under conda environment
|
||||
export PATH="$CONDA_ENV/bin":$PATH
|
||||
fi
|
||||
${CONDA_RUN} python3 test/bench_mps_ops.py
|
||||
|
||||
|
||||
- name: Stop monitoring script
|
||||
|
||||
3
.github/workflows/_xpu-test.yml
vendored
3
.github/workflows/_xpu-test.yml
vendored
@ -191,6 +191,9 @@ jobs:
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
SCCACHE_REGION: us-east-1
|
||||
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
|
||||
DOCKER_IMAGE: ${{ inputs.docker-image }}
|
||||
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
|
||||
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
|
||||
|
||||
4
.github/workflows/build-almalinux-images.yml
vendored
4
.github/workflows/build-almalinux-images.yml
vendored
@ -23,7 +23,7 @@ on:
|
||||
env:
|
||||
DOCKER_REGISTRY: "docker.io"
|
||||
DOCKER_BUILDKIT: 1
|
||||
WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) }}
|
||||
WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release')) }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
@ -32,7 +32,7 @@ concurrency:
|
||||
jobs:
|
||||
build-docker:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
environment: ${{ (github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) && 'docker-build') || '' }}
|
||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||
runs-on: linux.9xlarge.ephemeral
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
4
.github/workflows/build-libtorch-images.yml
vendored
4
.github/workflows/build-libtorch-images.yml
vendored
@ -22,7 +22,7 @@ on:
|
||||
env:
|
||||
DOCKER_REGISTRY: "docker.io"
|
||||
DOCKER_BUILDKIT: 1
|
||||
WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) }}
|
||||
WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release')) }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
@ -40,7 +40,7 @@ jobs:
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
build:
|
||||
environment: ${{ (github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) && 'docker-build') || '' }}
|
||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||
needs: get-label-type
|
||||
runs-on: ${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral
|
||||
name: libtorch-cxx11-builder:${{ matrix.tag }}
|
||||
|
||||
@ -12,7 +12,7 @@ on:
|
||||
env:
|
||||
DOCKER_REGISTRY: "docker.io"
|
||||
DOCKER_BUILDKIT: 1
|
||||
WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) }}
|
||||
WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release')) }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
@ -21,7 +21,7 @@ concurrency:
|
||||
jobs:
|
||||
build-docker-cpu-s390x:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
environment: ${{ (github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) && 'docker-build') || '' }}
|
||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||
runs-on: linux.s390x
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
|
||||
5
.github/workflows/build-manywheel-images.yml
vendored
5
.github/workflows/build-manywheel-images.yml
vendored
@ -23,7 +23,8 @@ on:
|
||||
env:
|
||||
DOCKER_REGISTRY: "docker.io"
|
||||
DOCKER_BUILDKIT: 1
|
||||
WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) }}
|
||||
WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release')) }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
@ -40,7 +41,7 @@ jobs:
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
build:
|
||||
environment: ${{ (github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) && 'docker-build') || '' }}
|
||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||
needs: get-label-type
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
||||
1
.github/workflows/build-triton-wheel.yml
vendored
1
.github/workflows/build-triton-wheel.yml
vendored
@ -8,7 +8,6 @@ on:
|
||||
# NOTE: Binary build pipelines should only get triggered on release candidate builds
|
||||
# Release candidate tags look like: v1.11.0-rc1
|
||||
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||
- 'ciflow/triton_binaries/*'
|
||||
paths:
|
||||
- .github/workflows/build-triton-wheel.yml
|
||||
- .github/scripts/build_triton_wheel.py
|
||||
|
||||
2
.github/workflows/create_release.yml
vendored
2
.github/workflows/create_release.yml
vendored
@ -82,7 +82,7 @@ jobs:
|
||||
path: ${{ env.PT_RELEASE_FILE }}
|
||||
- name: Set output
|
||||
id: release_name
|
||||
run: echo "pt_release_name=${{ env.PT_RELEASE_NAME }}.tar.gz" >> "${GITHUB_OUTPUT}"
|
||||
run: echo "name=pt_release_name::${{ env.PT_RELEASE_NAME }}.tar.gz" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
upload_source_code_to_s3:
|
||||
if: ${{ github.repository == 'pytorch/pytorch' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
|
||||
|
||||
1
.github/workflows/docker-builds.yml
vendored
1
.github/workflows/docker-builds.yml
vendored
@ -69,6 +69,7 @@ jobs:
|
||||
pytorch-linux-jammy-py3.12-halide,
|
||||
pytorch-linux-jammy-xpu-2025.0-py3,
|
||||
pytorch-linux-jammy-xpu-2025.1-py3,
|
||||
pytorch-linux-jammy-py3-clang15-asan,
|
||||
pytorch-linux-jammy-py3-clang18-asan,
|
||||
pytorch-linux-jammy-py3-clang12-onnx,
|
||||
pytorch-linux-jammy-linter,
|
||||
|
||||
12
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
12
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -136,7 +136,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_9-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -252,7 +252,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -368,7 +368,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -484,7 +484,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -600,7 +600,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@ -716,7 +716,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_13t-cuda-aarch64-12_9
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
timeout-minutes: 420
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
6
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
6
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
@ -61,7 +61,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_6-test: # Testing
|
||||
@ -108,7 +108,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_8-test: # Testing
|
||||
@ -155,7 +155,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_9-test: # Testing
|
||||
|
||||
172
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
172
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
@ -131,7 +131,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_6-test: # Testing
|
||||
@ -200,7 +200,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_8-test: # Testing
|
||||
@ -269,7 +269,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cuda12_9-test: # Testing
|
||||
@ -744,7 +744,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_6-test: # Testing
|
||||
@ -813,7 +813,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_8-test: # Testing
|
||||
@ -882,7 +882,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_10-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_9-test: # Testing
|
||||
@ -1357,7 +1357,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_6-test: # Testing
|
||||
@ -1407,6 +1407,74 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cuda12_6-full-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu126
|
||||
GPU_ARCH_VERSION: 12.6
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_6-full
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_6-full-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_11-cuda12_6-full-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu126
|
||||
GPU_ARCH_VERSION: 12.6
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cuda12_6-full
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu # for other cuda versions, we use 4xlarge runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_6-full-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_11-cuda12_6-full-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu126
|
||||
GPU_ARCH_VERSION: 12.6
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cuda12_6-full
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cuda12_8-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -1426,7 +1494,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_8-test: # Testing
|
||||
@ -1476,74 +1544,6 @@ jobs:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cuda12_8-full-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu128
|
||||
GPU_ARCH_VERSION: 12.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_8-full
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_8-full-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_11-cuda12_8-full-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu128
|
||||
GPU_ARCH_VERSION: 12.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cuda12_8-full
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_8-full-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_11-cuda12_8-full-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu128
|
||||
GPU_ARCH_VERSION: 12.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cuda12_8-full
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cuda12_9-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -1563,7 +1563,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_11-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cuda12_9-test: # Testing
|
||||
@ -2038,7 +2038,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_6-test: # Testing
|
||||
@ -2107,7 +2107,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_8-test: # Testing
|
||||
@ -2176,7 +2176,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_12-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cuda12_9-test: # Testing
|
||||
@ -2651,7 +2651,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_6-test: # Testing
|
||||
@ -2720,7 +2720,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_8-test: # Testing
|
||||
@ -2789,7 +2789,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cuda12_9-test: # Testing
|
||||
@ -3264,7 +3264,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_6
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_6-test: # Testing
|
||||
@ -3333,7 +3333,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.2.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_8-test: # Testing
|
||||
@ -3402,7 +3402,7 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_9
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_9-test: # Testing
|
||||
|
||||
18
.github/workflows/h100-distributed.yml
vendored
18
.github/workflows/h100-distributed.yml
vendored
@ -8,8 +8,6 @@ on:
|
||||
push:
|
||||
tags:
|
||||
- ciflow/h100-distributed/*
|
||||
schedule:
|
||||
- cron: 46 8 * * * # about 1:46am PDT
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
@ -27,14 +25,14 @@ jobs:
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-dist:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-dist
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm90-build:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runner: "linux.12xlarge"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-dist
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '9.0'
|
||||
test-matrix: |
|
||||
@ -44,12 +42,12 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm90-test:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-dist
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-dist
|
||||
- linux-jammy-cuda12_8-py3_10-gcc11-sm90-build
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-dist
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-dist.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-dist.outputs.test-matrix }}
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
54
.github/workflows/h100-symm-mem.yml
vendored
54
.github/workflows/h100-symm-mem.yml
vendored
@ -1,54 +0,0 @@
|
||||
name: Limited CI for symmetric memory tests on H100
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/h100-symm-mem.yml
|
||||
workflow_dispatch:
|
||||
push:
|
||||
tags:
|
||||
- ciflow/h100-symm-mem/*
|
||||
schedule:
|
||||
- cron: 22 8 * * * # about 1:22am PDT
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
get-label-type:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
name: get-label-type
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
||||
with:
|
||||
triggering_actor: ${{ github.triggering_actor }}
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-symm:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-symm
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-symm
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '9.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "h100-symm-mem", shard: 1, num_shards: 1, runner: "linux.aws.h100.4" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm90-test:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-symm
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-symm
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-symm
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-symm.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-symm.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
@ -13,9 +13,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
linux-jammy-cpu-py3_9-gcc11-inductor-build:
|
||||
|
||||
@ -13,9 +13,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-default-label-prefix:
|
||||
|
||||
4
.github/workflows/inductor-nightly.yml
vendored
4
.github/workflows/inductor-nightly.yml
vendored
@ -16,9 +16,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-default-label-prefix:
|
||||
|
||||
4
.github/workflows/inductor-perf-compare.yml
vendored
4
.github/workflows/inductor-perf-compare.yml
vendored
@ -10,9 +10,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-default-label-prefix:
|
||||
|
||||
@ -48,9 +48,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
|
||||
@ -63,9 +63,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
|
||||
@ -5,7 +5,7 @@ on:
|
||||
tags:
|
||||
- ciflow/inductor-perf-test-nightly-rocm/*
|
||||
schedule:
|
||||
- cron: 0 7 * * 0,3
|
||||
- cron: 0 7 * * 0
|
||||
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
|
||||
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
|
||||
workflow_dispatch:
|
||||
@ -88,23 +88,18 @@ jobs:
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_huggingface_perf_rocm", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 1, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 2, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 3, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 4, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_timm_perf_rocm", shard: 5, num_shards: 5, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 5, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 6, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 7, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 8, num_shards: 8, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 1, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 2, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 3, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
{ config: "inductor_torchbench_perf_rocm", shard: 4, num_shards: 4, runner: "linux.rocm.gpu.mi300.2" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
@ -47,15 +47,13 @@ on:
|
||||
description: The list of configs used the benchmark
|
||||
required: false
|
||||
type: string
|
||||
default: inductor_huggingface_perf_cpu_x86_zen,inductor_timm_perf_cpu_x86_zen,inductor_torchbench_perf_cpu_x86_zen
|
||||
default: inductor_huggingface_perf_zen_cpu_x86,inductor_timm_perf_zen_cpu_x86,inductor_torchbench_perf_zen_cpu_x86
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
@ -79,18 +77,18 @@ jobs:
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor_huggingface_perf_cpu_x86_zen", shard: 1, num_shards: 3, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_huggingface_perf_cpu_x86_zen", shard: 2, num_shards: 3, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_huggingface_perf_cpu_x86_zen", shard: 3, num_shards: 3, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_cpu_x86_zen", shard: 1, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_cpu_x86_zen", shard: 2, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_cpu_x86_zen", shard: 3, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_cpu_x86_zen", shard: 4, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_cpu_x86_zen", shard: 5, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_torchbench_perf_cpu_x86_zen", shard: 1, num_shards: 4, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_torchbench_perf_cpu_x86_zen", shard: 2, num_shards: 4, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_torchbench_perf_cpu_x86_zen", shard: 3, num_shards: 4, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_torchbench_perf_cpu_x86_zen", shard: 4, num_shards: 4, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_huggingface_perf_zen_cpu_x86", shard: 1, num_shards: 3, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_huggingface_perf_zen_cpu_x86", shard: 2, num_shards: 3, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_huggingface_perf_zen_cpu_x86", shard: 3, num_shards: 3, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_zen_cpu_x86", shard: 1, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_zen_cpu_x86", shard: 2, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_zen_cpu_x86", shard: 3, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_zen_cpu_x86", shard: 4, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_timm_perf_zen_cpu_x86", shard: 5, num_shards: 5, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_torchbench_perf_zen_cpu_x86", shard: 1, num_shards: 4, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_torchbench_perf_zen_cpu_x86", shard: 2, num_shards: 4, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_torchbench_perf_zen_cpu_x86", shard: 3, num_shards: 4, runner: "linux.24xlarge.amd" },
|
||||
{ config: "inductor_torchbench_perf_zen_cpu_x86", shard: 4, num_shards: 4, runner: "linux.24xlarge.amd" },
|
||||
]}
|
||||
selected-test-configs: ${{ inputs.benchmark_configs }}
|
||||
secrets: inherit
|
||||
|
||||
@ -1,9 +1,6 @@
|
||||
name: inductor-perf-nightly-x86
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/inductor-perf-test-nightly-x86.yml
|
||||
schedule:
|
||||
# - cron: 0 7 * * 1-6
|
||||
# - cron: 0 7 * * 0
|
||||
@ -43,11 +40,6 @@ on:
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
freezing:
|
||||
description: Run freezing?
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
benchmark_configs:
|
||||
description: The list of configs used the benchmark
|
||||
required: false
|
||||
@ -55,12 +47,10 @@ on:
|
||||
default: inductor_huggingface_perf_cpu_x86,inductor_timm_perf_cpu_x86,inductor_torchbench_perf_cpu_x86
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'schedule' }}
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
@ -100,14 +90,15 @@ jobs:
|
||||
selected-test-configs: ${{ inputs.benchmark_configs }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly-freezing:
|
||||
|
||||
linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly:
|
||||
name: linux-jammy-cpu-py3.9-gcc11-inductor
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
|
||||
if: github.event.schedule == '0 7 * * *'
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true
|
||||
dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true
|
||||
docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
|
||||
timeout-minutes: 720
|
||||
@ -117,6 +108,7 @@ jobs:
|
||||
monitor-data-collect-interval: 4
|
||||
secrets: inherit
|
||||
|
||||
|
||||
linux-jammy-cpu-py3_9-gcc11-inductor-test:
|
||||
name: linux-jammy-cpu-py3.9-gcc11-inductor
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
@ -124,7 +116,7 @@ jobs:
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
with:
|
||||
build-environment: linux-jammy-py3.9-gcc11-build
|
||||
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-freezing-${{ inputs.freezing }}
|
||||
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}
|
||||
docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
|
||||
timeout-minutes: 720
|
||||
|
||||
@ -63,9 +63,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
|
||||
4
.github/workflows/inductor-periodic.yml
vendored
4
.github/workflows/inductor-periodic.yml
vendored
@ -15,9 +15,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-default-label-prefix:
|
||||
|
||||
4
.github/workflows/inductor-unittest.yml
vendored
4
.github/workflows/inductor-unittest.yml
vendored
@ -12,9 +12,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-unittest
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
|
||||
4
.github/workflows/inductor.yml
vendored
4
.github/workflows/inductor.yml
vendored
@ -22,9 +22,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
unit-test:
|
||||
|
||||
4
.github/workflows/operator_benchmark.yml
vendored
4
.github/workflows/operator_benchmark.yml
vendored
@ -19,9 +19,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
linux-jammy-cpu-py3_9-gcc11-opbenchmark-build:
|
||||
|
||||
4
.github/workflows/periodic.yml
vendored
4
.github/workflows/periodic.yml
vendored
@ -20,9 +20,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
llm-td:
|
||||
|
||||
6
.github/workflows/pull.yml
vendored
6
.github/workflows/pull.yml
vendored
@ -19,9 +19,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
llm-td:
|
||||
@ -203,7 +201,6 @@ jobs:
|
||||
{ config: "dynamo_wrapped", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo_wrapped", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo_wrapped", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "einops", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" }
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
@ -239,7 +236,6 @@ jobs:
|
||||
{ config: "dynamo_wrapped", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo_wrapped", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo_wrapped", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "einops", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" }
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
4
.github/workflows/s390x-periodic.yml
vendored
4
.github/workflows/s390x-periodic.yml
vendored
@ -15,9 +15,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
llm-td:
|
||||
|
||||
4
.github/workflows/slow.yml
vendored
4
.github/workflows/slow.yml
vendored
@ -18,9 +18,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
llm-td:
|
||||
|
||||
4
.github/workflows/trunk.yml
vendored
4
.github/workflows/trunk.yml
vendored
@ -16,9 +16,7 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
llm-td:
|
||||
|
||||
2
.github/workflows/update-viablestrict.yml
vendored
2
.github/workflows/update-viablestrict.yml
vendored
@ -7,7 +7,7 @@ on:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: true
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
do_update_viablestrict:
|
||||
|
||||
1
.github/workflows/upload-test-stats.yml
vendored
1
.github/workflows/upload-test-stats.yml
vendored
@ -16,7 +16,6 @@ on:
|
||||
- rocm-mi300
|
||||
- inductor-micro-benchmark
|
||||
- inductor-micro-benchmark-x86
|
||||
- inductor-cu124
|
||||
- inductor-rocm
|
||||
- inductor-rocm-mi300
|
||||
- mac-mps
|
||||
|
||||
4
.github/workflows/xpu.yml
vendored
4
.github/workflows/xpu.yml
vendored
@ -5,10 +5,6 @@ on:
|
||||
tags:
|
||||
- ciflow/xpu/*
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Run 3 times on weekdays and less frequently on weekends.
|
||||
- cron: 45 0,8,16 * * 1-5
|
||||
- cron: 45 4 * * 0,6
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
|
||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -129,6 +129,3 @@
|
||||
[submodule "third_party/flash-attention"]
|
||||
path = third_party/flash-attention
|
||||
url = https://github.com/Dao-AILab/flash-attention.git
|
||||
[submodule "third_party/aiter"]
|
||||
path = third_party/aiter
|
||||
url = https://github.com/ROCm/aiter.git
|
||||
|
||||
@ -87,7 +87,7 @@ include_patterns = [
|
||||
'torch/csrc/**/*.cpp',
|
||||
'torch/nativert/**/*.h',
|
||||
'torch/nativert/**/*.cpp',
|
||||
'torch/headeronly/**/*.h',
|
||||
'torch/standalone/**/*.h',
|
||||
'test/cpp/**/*.h',
|
||||
'test/cpp/**/*.cpp',
|
||||
]
|
||||
@ -122,7 +122,6 @@ is_formatter = true
|
||||
[[linter]]
|
||||
code = 'MYPY'
|
||||
include_patterns = [
|
||||
'setup.py',
|
||||
'torch/**/*.py',
|
||||
'torch/**/*.pyi',
|
||||
'caffe2/**/*.py',
|
||||
@ -242,7 +241,7 @@ include_patterns = [
|
||||
'torch/nativert/*.cpp',
|
||||
'torch/nativert/**/*.h',
|
||||
'torch/nativert/**/*.cpp',
|
||||
'torch/headeronly/**/*.h',
|
||||
'torch/standalone/**/*.h',
|
||||
]
|
||||
exclude_patterns = [
|
||||
# The negative filters below are to exclude files that include onnx_pb.h or
|
||||
@ -1157,7 +1156,6 @@ exclude_patterns = [
|
||||
'torch/_vendor/**',
|
||||
'torch/_inductor/fx_passes/serialized_patterns/**',
|
||||
'torch/_inductor/autoheuristic/artifacts/**',
|
||||
'torch/utils/model_dump/preact.mjs',
|
||||
# These files are all grandfathered in, feel free to remove from this list
|
||||
# as necessary
|
||||
# NOTE: remove the patterns in the order they are listed
|
||||
@ -1169,10 +1167,18 @@ exclude_patterns = [
|
||||
'test/**',
|
||||
'test/test_*',
|
||||
'test/[a-hA-h]*/**',
|
||||
'test/inductor/**',
|
||||
'test/dynamo/**',
|
||||
'test/distributed/**',
|
||||
'torch/**',
|
||||
'torch/_*/**',
|
||||
'torch/ao/**',
|
||||
'torch/fx/**',
|
||||
'torch/distributed/tensor/**',
|
||||
'torch/[j-o]*/**',
|
||||
'torch/utils/**',
|
||||
'torch/csrc/jit/**',
|
||||
'torch/csrc/jit/[a-o]*/**',
|
||||
]
|
||||
init_command = [
|
||||
'python3',
|
||||
|
||||
@ -671,6 +671,14 @@ flatbuffer_cc_library(
|
||||
out_prefix = "torch/csrc/jit/serialization/",
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "torch_standalone_headers",
|
||||
hdrs = glob([
|
||||
"torch/standalone/**/*.h"
|
||||
]),
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "torch_headers",
|
||||
hdrs = if_cuda(
|
||||
|
||||
73
MANIFEST.in
73
MANIFEST.in
@ -1,50 +1,31 @@
|
||||
# Reference: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
|
||||
|
||||
# Include source files in SDist
|
||||
include MANIFEST.in
|
||||
include CMakeLists.txt
|
||||
include *.bzl *.bazel .bazel* BUILD *.BUILD BUILD.* WORKSPACE
|
||||
include BUCK BUCK.*
|
||||
include requirements*.txt
|
||||
include version.txt
|
||||
include [Mm]akefile *.[Mm]akefile [Mm]akefile.*
|
||||
include [Dd]ockerfile *.[Dd]ockerfile [Dd]ockerfile.* .dockerignore
|
||||
graft android
|
||||
graft aten
|
||||
graft binaries
|
||||
graft c10
|
||||
graft caffe2
|
||||
graft cmake
|
||||
graft functorch
|
||||
graft third_party
|
||||
graft tools
|
||||
graft torch
|
||||
graft torchgen
|
||||
# FIXME: torch-xla build during codegen will fail if include this file in wheel
|
||||
exclude torchgen/BUILD.bazel
|
||||
|
||||
# Misc files and directories in SDist
|
||||
include *.md
|
||||
include CITATION.cff
|
||||
include LICENSE NOTICE
|
||||
include mypy*.ini
|
||||
graft benchmarks
|
||||
graft docs
|
||||
graft mypy_plugins
|
||||
graft scripts
|
||||
|
||||
# Misc files needed for custom setuptools command
|
||||
include .gitignore
|
||||
include LICENSE
|
||||
include NOTICE
|
||||
include .gitmodules
|
||||
|
||||
# Include test suites in SDist
|
||||
graft test
|
||||
include pytest.ini
|
||||
include .coveragerc
|
||||
|
||||
# Prune generated/compiled files
|
||||
prune torchgen/packaged
|
||||
include build_variables.bzl
|
||||
include mypy.ini
|
||||
include requirements.txt
|
||||
include ufunc_defs.bzl
|
||||
include version.txt
|
||||
recursive-include android *.*
|
||||
recursive-include aten *.*
|
||||
recursive-include binaries *.*
|
||||
recursive-include c10 *.*
|
||||
recursive-include caffe2 *.*
|
||||
recursive-include cmake *.*
|
||||
recursive-include torch *.*
|
||||
recursive-include tools *.*
|
||||
recursive-include test *.*
|
||||
recursive-include docs *.*
|
||||
recursive-include ios *.*
|
||||
recursive-include third_party *
|
||||
recursive-include test *.*
|
||||
recursive-include benchmarks *.*
|
||||
recursive-include scripts *.*
|
||||
recursive-include mypy_plugins *.*
|
||||
recursive-include modules *.*
|
||||
recursive-include functorch *.*
|
||||
prune */__pycache__
|
||||
global-exclude *.o *.obj *.so *.a *.dylib *.pxd *.dll *.lib *.py[cod]
|
||||
|
||||
prune */.git
|
||||
global-exclude .git *~ *.swp
|
||||
global-exclude *.o *.so *.dylib *.a .git *.pyc *.swp
|
||||
|
||||
6
Makefile
6
Makefile
@ -57,8 +57,7 @@ setup-env-cuda:
|
||||
setup-env-rocm:
|
||||
$(MAKE) setup-env PYTHON="$(PYTHON)" NIGHTLY_TOOL_OPTS="$(NIGHTLY_TOOL_OPTS) --rocm"
|
||||
|
||||
.PHONY: setup-lint
|
||||
setup-lint .lintbin/.lintrunner.sha256: requirements.txt pyproject.toml .lintrunner.toml
|
||||
.lintbin/.lintrunner.sha256: requirements.txt pyproject.toml .lintrunner.toml
|
||||
@echo "Setting up lintrunner..."
|
||||
$(PIP) install lintrunner
|
||||
lintrunner init
|
||||
@ -66,6 +65,9 @@ setup-lint .lintbin/.lintrunner.sha256: requirements.txt pyproject.toml .lintrun
|
||||
@mkdir -p .lintbin
|
||||
@sha256sum requirements.txt pyproject.toml .lintrunner.toml > .lintbin/.lintrunner.sha256
|
||||
|
||||
.PHONY: setup-lint
|
||||
setup-lint: .lintbin/.lintrunner.sha256
|
||||
|
||||
.PHONY: lazy-setup-lint
|
||||
lazy-setup-lint: .lintbin/.lintrunner.sha256
|
||||
@if [ ! -x "$(shell command -v lintrunner)" ]; then \
|
||||
|
||||
@ -200,7 +200,7 @@ If you want to compile with CUDA support, [select a supported version of CUDA fr
|
||||
- [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) v8.5 or above
|
||||
- [Compiler](https://gist.github.com/ax3l/9489132) compatible with CUDA
|
||||
|
||||
Note: You could refer to the [cuDNN Support Matrix](https://docs.nvidia.com/deeplearning/cudnn/backend/latest/reference/support-matrix.html) for cuDNN versions with the various supported CUDA, CUDA driver, and NVIDIA hardware.
|
||||
Note: You could refer to the [cuDNN Support Matrix](https://docs.nvidia.com/deeplearning/cudnn/backend/latest/reference/support-matrix.html) for cuDNN versions with the various supported CUDA, CUDA driver and NVIDIA hardware
|
||||
|
||||
If you want to disable CUDA support, export the environment variable `USE_CUDA=0`.
|
||||
Other potentially useful environment variables may be found in `setup.py`. If
|
||||
|
||||
@ -50,7 +50,6 @@ Following is the Release Compatibility Matrix for PyTorch releases:
|
||||
|
||||
| PyTorch version | Python | C++ | Stable CUDA | Experimental CUDA | Stable ROCm |
|
||||
| --- | --- | --- | --- | --- | --- |
|
||||
| 2.8 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 12.6 (CUDNN 9.10.2.21), CUDA 12.8 (CUDNN 9.10.2.21) | CUDA 12.9 (CUDNN 9.10.2.21) | ROCm 6.4 |
|
||||
| 2.7 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 11.8 (CUDNN 9.1.0.70), CUDA 12.6 (CUDNN 9.5.1.17) | CUDA 12.8 (CUDNN 9.7.1.26) | ROCm 6.3 |
|
||||
| 2.6 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 11.8, CUDA 12.4 (CUDNN 9.1.0.70) | CUDA 12.6 (CUDNN 9.5.1.17) | ROCm 6.2.4 |
|
||||
| 2.5 | >=3.9, <=3.12, (3.13 experimental) | C++17 | CUDA 11.8, CUDA 12.1, CUDA 12.4, CUDNN 9.1.0.70 | None | ROCm 6.2 |
|
||||
@ -74,9 +73,9 @@ Following is the release cadence. All future dates below are tentative. For late
|
||||
| 2.4 | Jun 2024 | Jul 2024 | Sept 2024 | Not planned |
|
||||
| 2.5 | Sep 2024 | Oct 2024 | Nov 2024 | Not planned |
|
||||
| 2.6 | Dec 2024 | Jan 2025 | Not planned | Not planned |
|
||||
| 2.7 | Mar 2025 | Apr 2025 | Jun 2025 | Not planned |
|
||||
| 2.7 | Mar 2025 | Apr 2025 | (May 2025) | (Jun 2025) |
|
||||
| 2.8 | Jun 2025 | Jul 2025 | (Aug 2025) | (Sep 2025) |
|
||||
| 2.9 | Sept 2025 | Oct 2025 | (Nov 2025) | (Dec 2025) |
|
||||
| 2.9 | Aug 2025 | Oct 2025 | (Nov 2025) | (Dec 2025) |
|
||||
| 2.10 | Dec 2025 | Jan 2026 | (Feb 2026) | (Mar 2026) |
|
||||
| 2.11 | Mar 2026 | Apr 2026 | (Jun 2026) | (Jul 2026) |
|
||||
|
||||
|
||||
@ -193,10 +193,6 @@ if(USE_FLASH_ATTENTION)
|
||||
add_subdirectory(native/transformers/hip/flash_attn/ck)
|
||||
file(GLOB flash_attention_hip_ck_hip "native/transformers/hip/flash_attn/ck/*.hip")
|
||||
list(APPEND native_transformers_hip_hip ${flash_attention_hip_ck_hip})
|
||||
# FAv3 Generation
|
||||
add_subdirectory(native/transformers/hip/flash_attn/ck/fav_v3)
|
||||
file(GLOB flash_attention_v3_hip "native/transformers/hip/flash_attn/ck/fav_v3/*.hip")
|
||||
list(APPEND native_transformers_hip_hip ${flash_attention_v3_hip})
|
||||
endif()
|
||||
endif()
|
||||
file(GLOB flash_attention_hip_aot_hip "native/transformers/hip/flash_attn/aot/*.hip")
|
||||
@ -396,7 +392,6 @@ if(USE_ROCM)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel)
|
||||
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/aiter/csrc/include)
|
||||
_pytorch_rocm_generate_ck_conf()
|
||||
|
||||
# Next two lines are needed because TunableOp uses third-party/fmt
|
||||
|
||||
@ -19,69 +19,9 @@
|
||||
#if defined(__aarch64__) && !defined(C10_MOBILE)
|
||||
#include <cpuinfo.h>
|
||||
#endif
|
||||
|
||||
namespace at {
|
||||
|
||||
namespace {
|
||||
|
||||
/*
|
||||
These const variables defined the fp32 precisions for different backend
|
||||
We have "generic", "cuda", "mkldnn" backend now and we can choose fp32
|
||||
prevision from "ieee", "tf32", "bf16" and "none". The "ieee" precision means
|
||||
IEEE standard floating point format "tf32" and "bf16" means we are allowed to
|
||||
use "tf32" or "bf16" as internal computation data types for fp32 computations.
|
||||
And "none" means it is override-able by parent's node
|
||||
|
||||
generic->mkldnn->matmul
|
||||
->conv
|
||||
->rnn
|
||||
->cuda ->matmul
|
||||
->conv
|
||||
->rnn
|
||||
*/
|
||||
const std::map<std::string, std::vector<std::string>> _fp32_precisions = {
|
||||
{"generic", {{"ieee", "tf32", "bf16", "none"}}},
|
||||
{"mkldnn", {{"ieee", "bf16", "none"}}},
|
||||
{"cuda", {{"ieee", "tf32", "none"}}}};
|
||||
|
||||
// Check whether the backend and op are legal
|
||||
void check_fp32_prec_backend_and_op(
|
||||
const std::string& backend,
|
||||
const std::string& op) {
|
||||
static std::vector<std::string> backends = {"generic", "mkldnn", "cuda"};
|
||||
static std::vector<std::string> operators = {"conv", "matmul", "rnn", "all"};
|
||||
TORCH_CHECK(
|
||||
std::find(backends.begin(), backends.end(), backend) != backends.end(),
|
||||
"Invalid backend: ",
|
||||
backend);
|
||||
TORCH_CHECK(
|
||||
std::find(operators.begin(), operators.end(), op) != operators.end(),
|
||||
"Invalid operator: ",
|
||||
op);
|
||||
if (backend == "generic") {
|
||||
TORCH_CHECK(op == "all", "Invalid operation for generic backend: ", op);
|
||||
}
|
||||
}
|
||||
|
||||
// Return whether the precision is supported by backends
|
||||
bool validate_fp32_prec(
|
||||
const std::string& backend,
|
||||
const std::string& precision) {
|
||||
auto iterp = _fp32_precisions.find(backend);
|
||||
TORCH_CHECK(iterp != _fp32_precisions.end());
|
||||
auto precisions = iterp->second;
|
||||
bool valid = std::find(precisions.begin(), precisions.end(), precision) !=
|
||||
precisions.end();
|
||||
return valid;
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE void warn_deprecated_fp32_precision_api(){
|
||||
TORCH_WARN_ONCE(
|
||||
"This API is going to be deprecated, please see "
|
||||
"https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices"
|
||||
);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Context::Context() = default;
|
||||
|
||||
// TODO: This could be bad juju if someone calls globalContext() in the
|
||||
@ -175,29 +115,12 @@ void Context::setUserEnabledNNPACK(bool e) {
|
||||
enabled_nnpack = e;
|
||||
}
|
||||
|
||||
bool Context::allowTF32CuDNN(const std::string& op) const {
|
||||
if (op.size() == 0){
|
||||
bool allow_tf32_rnn = float32Precision("cuda", "rnn") == "tf32";
|
||||
bool allow_tf32_conv = float32Precision("cuda", "conv") == "tf32";
|
||||
TORCH_CHECK(
|
||||
allow_tf32_rnn == allow_tf32_conv && allow_tf32_rnn == allow_tf32_cudnn,
|
||||
"PyTorch is checking whether allow_tf32 is enabled for cuDNN without a specific operator name,",
|
||||
"but the current flag(s) indicate that cuDNN conv and cuDNN RNN have different TF32 flags.",
|
||||
"This combination indicates that you have used a mix of the legacy and new APIs to set the TF32 flags. ",
|
||||
"We suggest only using the new API to set the TF32 flag(s). See also: ",
|
||||
"https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices");
|
||||
} else {
|
||||
return float32Precision("cuda", op) == "tf32";
|
||||
}
|
||||
warn_deprecated_fp32_precision_api();
|
||||
bool Context::allowTF32CuDNN() const {
|
||||
return allow_tf32_cudnn;
|
||||
}
|
||||
|
||||
void Context::setAllowTF32CuDNN(bool b) {
|
||||
setFloat32Precision("cuda", "rnn", b ? "tf32" : "none");
|
||||
setFloat32Precision("cuda", "conv", b ? "tf32" : "none");
|
||||
allow_tf32_cudnn = b;
|
||||
warn_deprecated_fp32_precision_api();
|
||||
}
|
||||
|
||||
void Context::setSDPPriorityOrder(const std::vector<int64_t>& order) {
|
||||
@ -218,13 +141,12 @@ bool Context::allowTF32OneDNN() const {
|
||||
return allow_tf32_onednn;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(clang-diagnostic-unused-parameter)
|
||||
void Context::setAllowTF32OneDNN(bool b){
|
||||
#ifdef USE_XPU
|
||||
void Context::setAllowTF32OneDNN(bool b){
|
||||
#ifdef USE_XPU
|
||||
allow_tf32_onednn = b;
|
||||
#else
|
||||
#else
|
||||
TORCH_WARN("TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support.");
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
bool Context::userEnabledFlashSDP() const {
|
||||
@ -337,16 +259,7 @@ bool Context::allowTF32CuBLAS() const {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
bool legacy_allow_tf32 = float32_matmul_precision != at::Float32MatmulPrecision::HIGHEST;
|
||||
bool allow_tf32_new = float32Precision("cuda", "matmul") == "tf32";
|
||||
TORCH_CHECK(
|
||||
legacy_allow_tf32 == allow_tf32_new,
|
||||
"PyTorch is checking whether allow_tf32_new is enabled for cuBlas matmul,",
|
||||
"Current status indicate that you have used mix of the legacy and new APIs to set the TF32 status for cublas matmul. ",
|
||||
"We suggest only using the new API to set the TF32 flag. See also: ",
|
||||
"https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices");
|
||||
warn_deprecated_fp32_precision_api();
|
||||
return allow_tf32_new;
|
||||
return float32_matmul_precision != at::Float32MatmulPrecision::HIGHEST;
|
||||
}
|
||||
|
||||
void Context::setAllowTF32CuBLAS(bool b) {
|
||||
@ -359,54 +272,27 @@ void Context::setAllowTF32CuBLAS(bool b) {
|
||||
}
|
||||
#endif
|
||||
float32_matmul_precision = b ? at::Float32MatmulPrecision::HIGH : at::Float32MatmulPrecision::HIGHEST;
|
||||
setFloat32Precision("cuda", "matmul", b ? "tf32" : "ieee");
|
||||
}
|
||||
|
||||
Float32MatmulPrecision Context::float32MatmulPrecision() const {
|
||||
bool invalid = float32Precision("cuda", "matmul") == "tf32" &&
|
||||
float32_matmul_precision == at::Float32MatmulPrecision::HIGHEST;
|
||||
invalid = invalid ||
|
||||
(float32Precision("mkldnn", "matmul") == "bf16" &&
|
||||
float32_matmul_precision != at::Float32MatmulPrecision::MEDIUM);
|
||||
TORCH_CHECK(
|
||||
!invalid,
|
||||
"PyTorch is checking the matmul precision without a specific backend name,",
|
||||
"Current status indicate that you have used mix of the legacy and new APIs to set the matmul precision. ",
|
||||
"We suggest only using the new API for matmul precision. See also: ",
|
||||
"https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices");
|
||||
warn_deprecated_fp32_precision_api();
|
||||
return float32_matmul_precision;
|
||||
}
|
||||
|
||||
std::string Context::float32Precision(const std::string& backend, const std::string& op) const {
|
||||
check_fp32_prec_backend_and_op(backend, op);
|
||||
auto precision = fp32_precision.find(backend)->second.find(op)->second;
|
||||
if (precision == "none")
|
||||
precision = fp32_precision.find(backend)->second.find("all")->second;
|
||||
if (precision == "none")
|
||||
precision = fp32_precision.find("generic")->second.find("all")->second;
|
||||
bool valid_prec = validate_fp32_prec(backend, precision);
|
||||
return valid_prec ? precision : "none";
|
||||
void Context::setFloat32MatmulPrecision(Float32MatmulPrecision p) {
|
||||
float32_matmul_precision = p;
|
||||
}
|
||||
|
||||
void Context::setFloat32MatmulPrecision(const std::string &s) {
|
||||
auto match = [this](const std::string & s_) {
|
||||
warn_deprecated_fp32_precision_api();
|
||||
// TODO: consider if CuDNN field needs to also be set for potential future CuDNN ops like multi-headed attention
|
||||
if (s_ == "highest") {
|
||||
float32_matmul_precision = at::Float32MatmulPrecision::HIGHEST;
|
||||
setFloat32Precision("cuda", "matmul", "ieee");
|
||||
setFloat32Precision("mkldnn", "matmul", "ieee");
|
||||
return true;
|
||||
} else if (s_ == "high") {
|
||||
float32_matmul_precision = at::Float32MatmulPrecision::HIGH;
|
||||
setFloat32Precision("cuda", "matmul", "tf32");
|
||||
setFloat32Precision("mkldnn", "matmul", "ieee");
|
||||
return true;
|
||||
} else if (s_ == "medium") {
|
||||
float32_matmul_precision = at::Float32MatmulPrecision::MEDIUM;
|
||||
setFloat32Precision("cuda", "matmul", "tf32");
|
||||
setFloat32Precision("mkldnn", "matmul", "bf16");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -420,27 +306,6 @@ void Context::setFloat32MatmulPrecision(const std::string &s) {
|
||||
"setFloat32MatmulPrecision call has no effect.");
|
||||
}
|
||||
|
||||
void Context::setFloat32Precision(const std::string& backend, const std::string& op, const std::string& p) {
|
||||
check_fp32_prec_backend_and_op(backend, op);
|
||||
if (validate_fp32_prec(backend, p)) {
|
||||
fp32_precision[backend][op] = p;
|
||||
} else {
|
||||
std::string msg;
|
||||
auto iterp = _fp32_precisions.find(backend);
|
||||
TORCH_CHECK(iterp != _fp32_precisions.end());
|
||||
for (auto p : iterp->second) {
|
||||
msg += p;
|
||||
msg += " ";
|
||||
}
|
||||
TORCH_WARN(
|
||||
"you have set wrong precision for backend:",
|
||||
backend,
|
||||
" setFloat32Precision call has no effect.",
|
||||
"Please choose precision from: ",
|
||||
msg);
|
||||
}
|
||||
}
|
||||
|
||||
at::LinalgBackend Context::linalgPreferredBackend() const {
|
||||
return linalg_preferred_backend;
|
||||
}
|
||||
@ -670,14 +535,13 @@ at::QEngine Context::qEngine() const {
|
||||
#endif
|
||||
return qengine;
|
||||
}();
|
||||
auto qt_engine = quantized_engine.load();
|
||||
return qt_engine == at::QEngine::NoQEngine ? _quantized_engine : qt_engine;
|
||||
return quantized_engine.value_or(_quantized_engine);
|
||||
}
|
||||
|
||||
void Context::setQEngine(at::QEngine e) {
|
||||
const auto& qengines = supportedQEngines();
|
||||
if (std::find(qengines.begin(), qengines.end(), e) != qengines.end()) {
|
||||
quantized_engine.store(e);
|
||||
quantized_engine = e;
|
||||
return;
|
||||
}
|
||||
TORCH_CHECK(false, "quantized engine ", toString(e), " is not supported");
|
||||
@ -689,9 +553,17 @@ const std::vector<at::QEngine>& Context::supportedQEngines() {
|
||||
// Engines are listed in priority order: later one wins
|
||||
// By default we prefer FBGEMM if we're running on server side
|
||||
// QNNPACK on server side has some issue, so we disable it by default.
|
||||
#ifdef C10_MOBILE
|
||||
engines.push_back(at::kNoQEngine);
|
||||
#ifdef USE_PYTORCH_QNNPACK
|
||||
engines.push_back(at::kQNNPACK);
|
||||
#endif
|
||||
#else // C10_MOBILE
|
||||
#ifdef USE_PYTORCH_QNNPACK
|
||||
engines.push_back(at::kQNNPACK);
|
||||
#endif
|
||||
engines.push_back(at::kNoQEngine);
|
||||
#endif // C10_MOBILE
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
engines.push_back(at::kONEDNN);
|
||||
@ -823,7 +695,6 @@ void Context::setAllowFP16ReductionCPU(bool b) {
|
||||
#if defined(__aarch64__) && !defined(C10_MOBILE)
|
||||
if (!cpuinfo_initialize() || !cpuinfo_has_arm_fp16_arith())
|
||||
#else
|
||||
// NOLINTNEXTLINE(facebook-hte-MissingBraces)
|
||||
if (true)
|
||||
#endif
|
||||
TORCH_CHECK(false, "Float16 arithmetic is not supported by the CPU!");
|
||||
|
||||
@ -28,7 +28,6 @@
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
namespace at {
|
||||
@ -337,20 +336,14 @@ class TORCH_API Context {
|
||||
void alertCuBLASConfigNotDeterministic() const;
|
||||
|
||||
void setFloat32MatmulPrecision(const std::string& s);
|
||||
void setFloat32Precision(
|
||||
const std::string& backend,
|
||||
const std::string& op,
|
||||
const std::string& s);
|
||||
bool allowTF32CuDNN(const std::string& op = std::string()) const;
|
||||
bool allowTF32CuDNN() const;
|
||||
void setAllowTF32CuDNN(bool);
|
||||
bool allowTF32OneDNN() const;
|
||||
void setAllowTF32OneDNN(bool);
|
||||
bool allowTF32CuBLAS() const;
|
||||
void setAllowTF32CuBLAS(bool);
|
||||
Float32MatmulPrecision float32MatmulPrecision() const;
|
||||
std::string float32Precision(
|
||||
const std::string& backend,
|
||||
const std::string& op) const;
|
||||
void setFloat32MatmulPrecision(Float32MatmulPrecision p);
|
||||
bool allowFP16ReductionCuBLAS() const;
|
||||
void setAllowFP16ReductionCuBLAS(bool);
|
||||
bool allowBF16ReductionCuBLAS() const;
|
||||
@ -472,27 +465,10 @@ class TORCH_API Context {
|
||||
bool release_original_weights = false;
|
||||
#endif
|
||||
bool display_vmap_fallback_warnings_ = false;
|
||||
std::atomic<at::QEngine> quantized_engine = at::QEngine::NoQEngine;
|
||||
std::optional<at::QEngine> quantized_engine = std::nullopt;
|
||||
bool enable_sparse_tensor_invariant_checks = false;
|
||||
bool allow_fp16_reduction_cpu = false;
|
||||
|
||||
std::map<std::string, std::map<std::string, std::string>> fp32_precision = {
|
||||
{"generic", {{"all", "none"}}},
|
||||
{"mkldnn",
|
||||
{{"matmul", "none"},
|
||||
{"conv", "none"},
|
||||
{"rnn", "none"},
|
||||
{"all", "none"}}},
|
||||
{"cuda",
|
||||
{{"matmul",
|
||||
float32_matmul_precision == at::Float32MatmulPrecision::HIGHEST
|
||||
? "none"
|
||||
: "tf32"},
|
||||
{"conv", "tf32"},
|
||||
{"rnn", "tf32"},
|
||||
{"all", "none"}}},
|
||||
};
|
||||
|
||||
Allocator* prev_allocator_ptr_{nullptr};
|
||||
};
|
||||
|
||||
|
||||
@ -266,38 +266,19 @@ ScalarType toScalarType(const DLDataType& dtype) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// The templated classes below are needed for supporting both:
|
||||
// - DLManagedTensor
|
||||
// - DLManagedTensorVersioned
|
||||
template <class T>
|
||||
struct ATenDLMTensor {
|
||||
Tensor handle;
|
||||
T tensor{};
|
||||
DLManagedTensor tensor{};
|
||||
};
|
||||
} // namespace
|
||||
|
||||
template <class T>
|
||||
void deleter(T* arg) {
|
||||
delete static_cast<ATenDLMTensor<T>*>(arg->manager_ctx);
|
||||
}
|
||||
|
||||
// Adds version information for DLManagedTensorVersioned.
|
||||
// This is a no-op for the other types.
|
||||
template <class T>
|
||||
void fillVersion(T* tensor) {}
|
||||
|
||||
template <>
|
||||
void fillVersion<DLManagedTensorVersioned>(
|
||||
DLManagedTensorVersioned* tensor) {
|
||||
tensor->flags = 0;
|
||||
tensor->version.major = DLPACK_MAJOR_VERSION;
|
||||
tensor->version.minor = DLPACK_MINOR_VERSION;
|
||||
static void deleter(DLManagedTensor* arg) {
|
||||
delete static_cast<ATenDLMTensor*>(arg->manager_ctx);
|
||||
}
|
||||
|
||||
// This function returns a shared_ptr to memory managed DLpack tensor
|
||||
// constructed out of ATen tensor
|
||||
template <class T>
|
||||
T* toDLPackImpl(const Tensor& src) {
|
||||
DLManagedTensor* toDLPack(const Tensor& src) {
|
||||
// create a new tensor with possibly normalized strides
|
||||
// gh-83069
|
||||
auto shape = src.sizes();
|
||||
@ -309,10 +290,10 @@ T* toDLPackImpl(const Tensor& src) {
|
||||
}
|
||||
|
||||
auto view = src.as_strided(shape, strides, src.storage_offset());
|
||||
ATenDLMTensor<T>* atDLMTensor(new ATenDLMTensor<T>);
|
||||
ATenDLMTensor* atDLMTensor(new ATenDLMTensor);
|
||||
atDLMTensor->handle = view;
|
||||
atDLMTensor->tensor.manager_ctx = atDLMTensor;
|
||||
atDLMTensor->tensor.deleter = &deleter<T>;
|
||||
atDLMTensor->tensor.deleter = &deleter;
|
||||
atDLMTensor->tensor.dl_tensor.data = view.data_ptr();
|
||||
c10::DeviceIndex device_id = 0;
|
||||
if (src.is_cuda() || src.is_privateuseone()) {
|
||||
@ -324,68 +305,35 @@ T* toDLPackImpl(const Tensor& src) {
|
||||
atDLMTensor->tensor.dl_tensor.shape = view.sizes().data();
|
||||
atDLMTensor->tensor.dl_tensor.strides = view.strides().data();
|
||||
atDLMTensor->tensor.dl_tensor.byte_offset = 0;
|
||||
fillVersion(&atDLMTensor->tensor);
|
||||
|
||||
return &(atDLMTensor->tensor);
|
||||
}
|
||||
|
||||
// Explicitly instantiate the template above for both classes.
|
||||
template DLManagedTensor* toDLPackImpl<DLManagedTensor>(const Tensor&);
|
||||
template DLManagedTensorVersioned* toDLPackImpl<DLManagedTensorVersioned>(const Tensor&);
|
||||
Tensor fromDLPack(DLManagedTensor* src) {
|
||||
auto deleter = [src](void* self [[maybe_unused]]) {
|
||||
if (src->deleter) {
|
||||
src->deleter(src);
|
||||
}
|
||||
};
|
||||
return fromDLPack(src, std::move(deleter));
|
||||
}
|
||||
|
||||
// This function constructs a Tensor from a memory managed DLPack which
|
||||
// may be represented as either: DLManagedTensor and DLManagedTensorVersioned.
|
||||
template <class T>
|
||||
at::Tensor fromDLPackImpl(T* src, std::function<void(void*)> deleter) {
|
||||
if (!deleter) {
|
||||
deleter = [src](void* self [[maybe_unused]]) {
|
||||
if (src->deleter) {
|
||||
src->deleter(src);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
DLTensor& dl_tensor = src->dl_tensor;
|
||||
Device device = getATenDevice(dl_tensor.device, dl_tensor.data);
|
||||
ScalarType stype = toScalarType(dl_tensor.dtype);
|
||||
|
||||
if (!dl_tensor.strides) {
|
||||
Tensor fromDLPack(DLManagedTensor* src, std::function<void(void*)> deleter) {
|
||||
Device device = getATenDevice(src->dl_tensor.device, src->dl_tensor.data);
|
||||
ScalarType stype = toScalarType(src->dl_tensor.dtype);
|
||||
if (!src->dl_tensor.strides) {
|
||||
return at::from_blob(
|
||||
dl_tensor.data,
|
||||
IntArrayRef(dl_tensor.shape, dl_tensor.ndim),
|
||||
src->dl_tensor.data,
|
||||
IntArrayRef(src->dl_tensor.shape, src->dl_tensor.ndim),
|
||||
std::move(deleter),
|
||||
at::device(device).dtype(stype),
|
||||
{device});
|
||||
}
|
||||
return at::from_blob(
|
||||
dl_tensor.data,
|
||||
IntArrayRef(dl_tensor.shape, dl_tensor.ndim),
|
||||
IntArrayRef(dl_tensor.strides, dl_tensor.ndim),
|
||||
src->dl_tensor.data,
|
||||
IntArrayRef(src->dl_tensor.shape, src->dl_tensor.ndim),
|
||||
IntArrayRef(src->dl_tensor.strides, src->dl_tensor.ndim),
|
||||
deleter,
|
||||
at::device(device).dtype(stype),
|
||||
{device});
|
||||
}
|
||||
|
||||
// Explicitly instantiate the template above for both classes.
|
||||
template at::Tensor fromDLPackImpl<DLManagedTensor>(DLManagedTensor* src, std::function<void(void*)> deleter);
|
||||
template at::Tensor fromDLPackImpl<DLManagedTensorVersioned>(DLManagedTensorVersioned* src, std::function<void(void*)> deleter);
|
||||
|
||||
} // namespace
|
||||
|
||||
DLManagedTensor* toDLPack(const Tensor& src) {
|
||||
return toDLPackImpl<DLManagedTensor>(src);
|
||||
}
|
||||
|
||||
DLManagedTensorVersioned* toDLPackVersioned(const Tensor& src) {
|
||||
return toDLPackImpl<DLManagedTensorVersioned>(src);
|
||||
}
|
||||
|
||||
Tensor fromDLPack(DLManagedTensor* src, std::function<void(void*)> deleter) {
|
||||
return fromDLPackImpl<DLManagedTensor>(src, std::move(deleter));
|
||||
}
|
||||
|
||||
Tensor fromDLPackVersioned(DLManagedTensorVersioned* src, std::function<void(void*)> deleter) {
|
||||
return fromDLPackImpl<DLManagedTensorVersioned>(src, std::move(deleter));
|
||||
}
|
||||
|
||||
} // namespace at
|
||||
|
||||
@ -12,48 +12,10 @@ namespace at {
|
||||
|
||||
TORCH_API ScalarType toScalarType(const DLDataType& dtype);
|
||||
TORCH_API DLManagedTensor* toDLPack(const Tensor& src);
|
||||
TORCH_API struct DLManagedTensorVersioned* toDLPackVersioned(const Tensor& src);
|
||||
TORCH_API Tensor fromDLPack(DLManagedTensor* src);
|
||||
TORCH_API Tensor
|
||||
fromDLPack(DLManagedTensor* src, std::function<void(void*)> deleter = {});
|
||||
TORCH_API Tensor fromDLPackVersioned(
|
||||
DLManagedTensorVersioned* src,
|
||||
std::function<void(void*)> deleter = {});
|
||||
fromDLPack(DLManagedTensor* src, std::function<void(void*)> deleter);
|
||||
TORCH_API DLDataType getDLDataType(const Tensor& t);
|
||||
TORCH_API DLDevice getDLContext(const Tensor& tensor, const int64_t& device_id);
|
||||
|
||||
// This trait class is used for retrieving different attributes, such as the
|
||||
// PyCapsule names and conversion functions for both DLPack tensor classes:
|
||||
// `DLManagedTensor` and `DLManagedTensorVersioned`.
|
||||
//
|
||||
// Each specialization should contain the following 2 traits:
|
||||
// - `capsule`: actual name of the capsule
|
||||
// - `used`: name of the capsule after using it
|
||||
// - `toDLPack`: function for converting a tensor into a DLPack capsule
|
||||
// - `fromDLPack`: function for creating a tensor from a DLPack capsule
|
||||
//
|
||||
// While `toDLPack` is the directly exposed to Python, `fromDLPack` is not.
|
||||
// Although it contains the core implementation, it lacks the required book
|
||||
// keeping logic contained in its caller `tensor_fromDLPack`.
|
||||
//
|
||||
// That said, `fromDLPack` is used directly in a few DLPack tests that live
|
||||
// inside ATen (no Python available).
|
||||
template <class T>
|
||||
struct DLPackTraits {};
|
||||
|
||||
template <>
|
||||
struct DLPackTraits<DLManagedTensor> {
|
||||
inline static const char* capsule = "dltensor";
|
||||
inline static const char* used = "used_dltensor";
|
||||
inline static auto toDLPack = at::toDLPack;
|
||||
inline static auto fromDLPack = at::fromDLPack;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct DLPackTraits<DLManagedTensorVersioned> {
|
||||
inline static const char* capsule = "dltensor_versioned";
|
||||
inline static const char* used = "used_dltensor_versioned";
|
||||
inline static auto toDLPack = at::toDLPackVersioned;
|
||||
inline static auto fromDLPack = at::fromDLPackVersioned;
|
||||
};
|
||||
|
||||
} // namespace at
|
||||
|
||||
@ -30,7 +30,7 @@ TORCH_API bool isAccelerator(c10::DeviceType device_type);
|
||||
template <
|
||||
typename... T,
|
||||
typename = std::enable_if_t<(std::is_same_v<T, c10::DeviceType> && ...)>>
|
||||
inline bool isAcceleratorExcluded(
|
||||
TORCH_API inline bool isAcceleratorExcluded(
|
||||
c10::DeviceType device_type,
|
||||
c10::DeviceType first_excluded,
|
||||
T... rest_excluded) {
|
||||
|
||||
@ -153,17 +153,12 @@ struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl {
|
||||
void mark_inductor_storage_resize(c10::SymInt new_size) {
|
||||
inductor_storage_resized_ = true;
|
||||
curr_storage_size_ = std::move(new_size);
|
||||
inductor_storage_resized_counter_++;
|
||||
}
|
||||
|
||||
bool was_inductor_storage_resized() {
|
||||
return inductor_storage_resized_;
|
||||
}
|
||||
|
||||
uint64_t inductor_storage_resized_counter() {
|
||||
return inductor_storage_resized_counter_;
|
||||
}
|
||||
|
||||
private:
|
||||
// NB: base_ should always point to a tensor BELOW the current
|
||||
// functionalization layer. This is mainly to avoid reference cycles. e.g.
|
||||
@ -209,7 +204,6 @@ struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl {
|
||||
// (1) There were any storage resizes on a graph input
|
||||
// (2) The original/curr storage size tell us if these resizes result in a nop
|
||||
bool inductor_storage_resized_ = false;
|
||||
uint64_t inductor_storage_resized_counter_ = 0;
|
||||
c10::SymInt original_storage_size_;
|
||||
c10::SymInt curr_storage_size_;
|
||||
};
|
||||
|
||||
@ -178,7 +178,7 @@ bool FunctionalTensorWrapper::is_up_to_date() const {
|
||||
// See Note [Functionalization Pass - Inplace View Ops]
|
||||
void FunctionalTensorWrapper::mutate_view_meta(const at::functionalization::ViewMeta& meta) {
|
||||
view_metas_.push_back(meta);
|
||||
// Manually track the fact that this tensor received a metadata mutation!
|
||||
// Manually track the fact that this tensor recieved a metadata mutation!
|
||||
has_metadata_mutation_ = true;
|
||||
// Mark this tensor as being symbolic if there are any symbolic inputs used by the view operation.
|
||||
maybe_mark_symbolic(meta);
|
||||
@ -273,7 +273,7 @@ void FunctionalTensorWrapper::set__impl(const FunctionalTensorWrapper* other) {
|
||||
// (We could check if the updated value has a new storage than the original value,
|
||||
// but this won't also let us uniquely determine if the tensor **also**
|
||||
// experienced a data mutation).
|
||||
mark_storage_changed();
|
||||
was_storage_changed_ = true;
|
||||
|
||||
auto sizes_ = value_.sym_sizes();
|
||||
auto strides_ = value_.sym_strides();
|
||||
@ -499,8 +499,8 @@ int64_t FunctionalTensorWrapper::dim_custom() const {
|
||||
int64_t FunctionalTensorWrapper::numel_custom() const {
|
||||
return value_.unsafeGetTensorImpl()->numel();
|
||||
}
|
||||
c10::SymBool FunctionalTensorWrapper::sym_is_contiguous_custom(at::MemoryFormat memory_format) const {
|
||||
return value_.unsafeGetTensorImpl()->sym_is_contiguous(memory_format);
|
||||
bool FunctionalTensorWrapper::is_contiguous_custom(at::MemoryFormat memory_format) const {
|
||||
return value_.unsafeGetTensorImpl()->is_contiguous(memory_format);
|
||||
}
|
||||
c10::SymIntArrayRef FunctionalTensorWrapper::sym_sizes_custom() const {
|
||||
return value_.unsafeGetTensorImpl()->sym_sizes();
|
||||
@ -579,7 +579,7 @@ std::vector<Tensor> from_functional_tensor(ITensorListRef t_list) {
|
||||
for (const auto& tensor : t_list) {
|
||||
// from_functional_tensor(Tensor) has asserts to make sure you don't accidentally call
|
||||
// it on a non-functional input,
|
||||
// but from_functional_tensor(TensorList) can receive a list containing both
|
||||
// but from_functional_tensor(TensorList) can recieve a list containing both
|
||||
// functional and non-functional tensors.
|
||||
// Example of when that can happen: torch.cat(function_input_tensor, global_state_tensor).
|
||||
// When that happens, we're okay with only unwrapping the functional tensors.
|
||||
|
||||
@ -163,13 +163,8 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
return was_storage_changed_;
|
||||
}
|
||||
|
||||
void mark_storage_changed() {
|
||||
void set_storage_changed() {
|
||||
was_storage_changed_ = true;
|
||||
storage_changed_counter_++;
|
||||
}
|
||||
|
||||
uint64_t storage_changed_counter() {
|
||||
return storage_changed_counter_;
|
||||
}
|
||||
|
||||
// A FunctionalTensor is considered a base if its not a view of another
|
||||
@ -188,9 +183,6 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
return functional_storage_impl()->was_inductor_storage_resized();
|
||||
}
|
||||
|
||||
bool inductor_storage_resized_counter() {
|
||||
return functional_storage_impl()->inductor_storage_resized_counter();
|
||||
}
|
||||
// The functionalization pass can be used to remove mutations.
|
||||
// It does so by replacing any mutation op with it's corresponding
|
||||
// out-of-place op, followed by a call to replace_(). e.g:
|
||||
@ -236,8 +228,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
at::IntArrayRef strides_custom() const override;
|
||||
int64_t dim_custom() const override;
|
||||
int64_t numel_custom() const override;
|
||||
c10::SymBool sym_is_contiguous_custom(
|
||||
at::MemoryFormat memory_format) const override;
|
||||
bool is_contiguous_custom(at::MemoryFormat memory_format) const override;
|
||||
c10::SymIntArrayRef sym_sizes_custom() const override;
|
||||
c10::SymInt sym_size_custom(int64_t d) const override;
|
||||
c10::SymIntArrayRef sym_strides_custom() const override;
|
||||
@ -280,7 +271,6 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
bool is_multi_output_view_ = false;
|
||||
// Did the tensor experience a set_() call.
|
||||
bool was_storage_changed_ = false;
|
||||
uint64_t storage_changed_counter_ = 0;
|
||||
// Did the tensor experience any view operation with symbolic int.
|
||||
bool is_symbolic_ = false;
|
||||
|
||||
@ -300,7 +290,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
||||
namespace functionalization {
|
||||
namespace impl {
|
||||
|
||||
inline FunctionalTensorWrapper* unsafeGetFunctionalWrapper(
|
||||
TORCH_API inline FunctionalTensorWrapper* unsafeGetFunctionalWrapper(
|
||||
const Tensor& tensor) {
|
||||
auto functional_impl =
|
||||
static_cast<FunctionalTensorWrapper*>(tensor.unsafeGetTensorImpl());
|
||||
|
||||
@ -320,9 +320,11 @@ static at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymInt
|
||||
auto stride = at::detail::computeStride(self.sym_sizes(), self.sym_strides(), inferred_size);
|
||||
|
||||
if (!stride.has_value()) {
|
||||
|
||||
TORCH_SYM_CHECK(
|
||||
self.sym_is_contiguous(),
|
||||
// With unbacked symints, computeStride could fail even on contiguous
|
||||
// tensors. In this case, we can use the strides of an empty tensor of
|
||||
// inferred_size.
|
||||
TORCH_CHECK(
|
||||
self.is_contiguous(),
|
||||
"View is not valid from size:",
|
||||
self.sym_sizes(),
|
||||
" stride: ",
|
||||
@ -331,9 +333,6 @@ static at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymInt
|
||||
inferred_size,
|
||||
" in case of unbacked symbols consider adding torch.check to guide computing strides.");
|
||||
|
||||
// With unbacked symints, computeStride could fail even on contiguous
|
||||
// tensors. In this case, we can use the strides of an empty tensor of
|
||||
// inferred_size.
|
||||
stride = at::detail::empty_symint_meta(
|
||||
inferred_size,
|
||||
std::nullopt,
|
||||
|
||||
@ -84,7 +84,7 @@ IntArrayRef BatchedTensorImpl::strides_custom() const {
|
||||
|
||||
// TODO: implement proper contiguity on batched tensor, then put
|
||||
// sizes_strides_policy back to Default
|
||||
c10::SymBool BatchedTensorImpl::sym_is_contiguous_custom(at::MemoryFormat memory_format) const {
|
||||
bool BatchedTensorImpl::is_contiguous_custom(at::MemoryFormat memory_format) const {
|
||||
TORCH_CHECK(memory_format == MemoryFormat::Contiguous,
|
||||
"NYI: querying is_contiguous inside of vmap for memory_format ",
|
||||
"other than torch.contiguous_format");
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user