mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-12 14:54:55 +08:00
Compare commits
1 Commits
ciflow/vll
...
document-a
| Author | SHA1 | Date | |
|---|---|---|---|
| feace9648e |
@ -13,4 +13,3 @@ exclude:
|
|||||||
- "**/benchmarks/**"
|
- "**/benchmarks/**"
|
||||||
- "**/test_*.py"
|
- "**/test_*.py"
|
||||||
- "**/*_test.py"
|
- "**/*_test.py"
|
||||||
- "tools/**"
|
|
||||||
|
|||||||
@ -8,8 +8,6 @@ if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
|
|||||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0"
|
export TORCH_CUDA_ARCH_LIST="8.0;9.0"
|
||||||
elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
|
elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
|
||||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
|
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
|
||||||
elif [[ "$GPU_ARCH_VERSION" == *"12.9"* ]]; then
|
|
||||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
|
|
||||||
elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
|
elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
|
||||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
|
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -7,13 +7,13 @@ ENV LC_ALL en_US.UTF-8
|
|||||||
ENV LANG en_US.UTF-8
|
ENV LANG en_US.UTF-8
|
||||||
ENV LANGUAGE en_US.UTF-8
|
ENV LANGUAGE en_US.UTF-8
|
||||||
|
|
||||||
ARG DEVTOOLSET_VERSION=13
|
ARG DEVTOOLSET_VERSION=11
|
||||||
|
|
||||||
RUN yum -y update
|
RUN yum -y update
|
||||||
RUN yum -y install epel-release
|
RUN yum -y install epel-release
|
||||||
# install glibc-langpack-en make sure en_US.UTF-8 locale is available
|
# install glibc-langpack-en make sure en_US.UTF-8 locale is available
|
||||||
RUN yum -y install glibc-langpack-en
|
RUN yum -y install glibc-langpack-en
|
||||||
RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel openssl-devel yum-utils autoconf automake make gcc-toolset-${DEVTOOLSET_VERSION}-gcc gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran gcc-toolset-${DEVTOOLSET_VERSION}-gdb
|
RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel openssl-devel yum-utils autoconf automake make gcc-toolset-${DEVTOOLSET_VERSION}-toolchain
|
||||||
# Just add everything as a safe.directory for git since these will be used in multiple places with git
|
# Just add everything as a safe.directory for git since these will be used in multiple places with git
|
||||||
RUN git config --global --add safe.directory '*'
|
RUN git config --global --add safe.directory '*'
|
||||||
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
|
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
|
||||||
@ -41,7 +41,6 @@ RUN bash ./install_conda.sh && rm install_conda.sh
|
|||||||
# Install CUDA
|
# Install CUDA
|
||||||
FROM base as cuda
|
FROM base as cuda
|
||||||
ARG CUDA_VERSION=12.6
|
ARG CUDA_VERSION=12.6
|
||||||
ARG DEVTOOLSET_VERSION=13
|
|
||||||
RUN rm -rf /usr/local/cuda-*
|
RUN rm -rf /usr/local/cuda-*
|
||||||
ADD ./common/install_cuda.sh install_cuda.sh
|
ADD ./common/install_cuda.sh install_cuda.sh
|
||||||
COPY ./common/install_nccl.sh install_nccl.sh
|
COPY ./common/install_nccl.sh install_nccl.sh
|
||||||
@ -51,8 +50,7 @@ ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
|
|||||||
# Preserve CUDA_VERSION for the builds
|
# Preserve CUDA_VERSION for the builds
|
||||||
ENV CUDA_VERSION=${CUDA_VERSION}
|
ENV CUDA_VERSION=${CUDA_VERSION}
|
||||||
# Make things in our path by default
|
# Make things in our path by default
|
||||||
ENV PATH=/usr/local/cuda-${CUDA_VERSION}/bin:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
|
ENV PATH=/usr/local/cuda-${CUDA_VERSION}/bin:$PATH
|
||||||
|
|
||||||
|
|
||||||
FROM cuda as cuda12.6
|
FROM cuda as cuda12.6
|
||||||
RUN bash ./install_cuda.sh 12.6
|
RUN bash ./install_cuda.sh 12.6
|
||||||
@ -70,22 +68,8 @@ FROM cuda as cuda13.0
|
|||||||
RUN bash ./install_cuda.sh 13.0
|
RUN bash ./install_cuda.sh 13.0
|
||||||
ENV DESIRED_CUDA=13.0
|
ENV DESIRED_CUDA=13.0
|
||||||
|
|
||||||
FROM ${ROCM_IMAGE} as rocm_base
|
FROM ${ROCM_IMAGE} as rocm
|
||||||
ARG DEVTOOLSET_VERSION=13
|
|
||||||
ENV LC_ALL en_US.UTF-8
|
|
||||||
ENV LANG en_US.UTF-8
|
|
||||||
ENV LANGUAGE en_US.UTF-8
|
|
||||||
# Install devtoolset on ROCm base image
|
|
||||||
RUN yum -y update && \
|
|
||||||
yum -y install epel-release && \
|
|
||||||
yum -y install glibc-langpack-en && \
|
|
||||||
yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel openssl-devel yum-utils autoconf automake make gcc-toolset-${DEVTOOLSET_VERSION}-gcc gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran gcc-toolset-${DEVTOOLSET_VERSION}-gdb
|
|
||||||
RUN git config --global --add safe.directory '*'
|
|
||||||
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
|
|
||||||
|
|
||||||
FROM rocm_base as rocm
|
|
||||||
ARG PYTORCH_ROCM_ARCH
|
ARG PYTORCH_ROCM_ARCH
|
||||||
ARG DEVTOOLSET_VERSION=13
|
|
||||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||||
ADD ./common/install_mkl.sh install_mkl.sh
|
ADD ./common/install_mkl.sh install_mkl.sh
|
||||||
RUN bash ./install_mkl.sh && rm install_mkl.sh
|
RUN bash ./install_mkl.sh && rm install_mkl.sh
|
||||||
@ -104,7 +88,6 @@ COPY --from=cuda13.0 /usr/local/cuda-13.0 /usr/local/cuda-13.0
|
|||||||
|
|
||||||
# Final step
|
# Final step
|
||||||
FROM ${BASE_TARGET} as final
|
FROM ${BASE_TARGET} as final
|
||||||
ARG DEVTOOLSET_VERSION=13
|
|
||||||
COPY --from=openssl /opt/openssl /opt/openssl
|
COPY --from=openssl /opt/openssl /opt/openssl
|
||||||
COPY --from=patchelf /patchelf /usr/local/bin/patchelf
|
COPY --from=patchelf /patchelf /usr/local/bin/patchelf
|
||||||
COPY --from=conda /opt/conda /opt/conda
|
COPY --from=conda /opt/conda /opt/conda
|
||||||
|
|||||||
@ -36,7 +36,11 @@ case ${DOCKER_TAG_PREFIX} in
|
|||||||
;;
|
;;
|
||||||
rocm*)
|
rocm*)
|
||||||
BASE_TARGET=rocm
|
BASE_TARGET=rocm
|
||||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx950;gfx1150;gfx1151"
|
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
|
# add gfx950 conditionally starting in ROCm 7.0
|
||||||
|
if [[ "$ROCM_VERSION" == *"7.0"* ]]; then
|
||||||
|
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
||||||
|
fi
|
||||||
EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
|
EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
@ -59,7 +63,7 @@ docker build \
|
|||||||
--target final \
|
--target final \
|
||||||
--progress plain \
|
--progress plain \
|
||||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||||
--build-arg "DEVTOOLSET_VERSION=13" \
|
--build-arg "DEVTOOLSET_VERSION=11" \
|
||||||
${EXTRA_BUILD_ARGS} \
|
${EXTRA_BUILD_ARGS} \
|
||||||
-t ${tmp_tag} \
|
-t ${tmp_tag} \
|
||||||
$@ \
|
$@ \
|
||||||
|
|||||||
@ -113,7 +113,6 @@ case "$tag" in
|
|||||||
UCX_COMMIT=${_UCX_COMMIT}
|
UCX_COMMIT=${_UCX_COMMIT}
|
||||||
UCC_COMMIT=${_UCC_COMMIT}
|
UCC_COMMIT=${_UCC_COMMIT}
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
INSTALL_MINGW=yes
|
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
|
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
|
||||||
CUDA_VERSION=13.0.0
|
CUDA_VERSION=13.0.0
|
||||||
@ -168,18 +167,6 @@ case "$tag" in
|
|||||||
VISION=yes
|
VISION=yes
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-py3.11-clang12)
|
|
||||||
ANACONDA_PYTHON_VERSION=3.11
|
|
||||||
CLANG_VERSION=12
|
|
||||||
VISION=no
|
|
||||||
TRITON=no
|
|
||||||
;;
|
|
||||||
pytorch-linux-jammy-py3.12-clang12)
|
|
||||||
ANACONDA_PYTHON_VERSION=3.12
|
|
||||||
CLANG_VERSION=12
|
|
||||||
VISION=no
|
|
||||||
TRITON=no
|
|
||||||
;;
|
|
||||||
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-jammy-rocm-n-py3-benchmarks | pytorch-linux-noble-rocm-n-py3)
|
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-jammy-rocm-n-py3-benchmarks | pytorch-linux-noble-rocm-n-py3)
|
||||||
if [[ $tag =~ "jammy" ]]; then
|
if [[ $tag =~ "jammy" ]]; then
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.10
|
||||||
@ -194,7 +181,7 @@ case "$tag" in
|
|||||||
KATEX=yes
|
KATEX=yes
|
||||||
UCX_COMMIT=${_UCX_COMMIT}
|
UCX_COMMIT=${_UCX_COMMIT}
|
||||||
UCC_COMMIT=${_UCC_COMMIT}
|
UCC_COMMIT=${_UCC_COMMIT}
|
||||||
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950;gfx1100"
|
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
|
||||||
if [[ $tag =~ "benchmarks" ]]; then
|
if [[ $tag =~ "benchmarks" ]]; then
|
||||||
INDUCTOR_BENCHMARKS=yes
|
INDUCTOR_BENCHMARKS=yes
|
||||||
fi
|
fi
|
||||||
@ -207,16 +194,13 @@ case "$tag" in
|
|||||||
NINJA_VERSION=1.9.0
|
NINJA_VERSION=1.9.0
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-noble-xpu-n-py3 | pytorch-linux-noble-xpu-n-py3-inductor-benchmarks)
|
pytorch-linux-jammy-xpu-n-py3)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.10
|
||||||
GCC_VERSION=13
|
GCC_VERSION=11
|
||||||
VISION=yes
|
VISION=yes
|
||||||
XPU_VERSION=2025.2
|
XPU_VERSION=2025.2
|
||||||
NINJA_VERSION=1.9.0
|
NINJA_VERSION=1.9.0
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
if [[ $tag =~ "benchmarks" ]]; then
|
|
||||||
INDUCTOR_BENCHMARKS=yes
|
|
||||||
fi
|
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
|
pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.10
|
||||||
@ -260,12 +244,6 @@ case "$tag" in
|
|||||||
HALIDE=yes
|
HALIDE=yes
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-cuda12.8-py3.12-pallas)
|
|
||||||
CUDA_VERSION=12.8.1
|
|
||||||
ANACONDA_PYTHON_VERSION=3.12
|
|
||||||
GCC_VERSION=11
|
|
||||||
PALLAS=yes
|
|
||||||
;;
|
|
||||||
pytorch-linux-jammy-py3.12-triton-cpu)
|
pytorch-linux-jammy-py3.12-triton-cpu)
|
||||||
CUDA_VERSION=12.6
|
CUDA_VERSION=12.6
|
||||||
ANACONDA_PYTHON_VERSION=3.12
|
ANACONDA_PYTHON_VERSION=3.12
|
||||||
@ -279,9 +257,9 @@ case "$tag" in
|
|||||||
PYTHON_VERSION=3.10
|
PYTHON_VERSION=3.10
|
||||||
CUDA_VERSION=12.8.1
|
CUDA_VERSION=12.8.1
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-aarch64-py3.10-gcc13)
|
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.10
|
||||||
GCC_VERSION=13
|
GCC_VERSION=11
|
||||||
ACL=yes
|
ACL=yes
|
||||||
VISION=yes
|
VISION=yes
|
||||||
OPENBLAS=yes
|
OPENBLAS=yes
|
||||||
@ -289,19 +267,9 @@ case "$tag" in
|
|||||||
# from pytorch/llvm:9.0.1 is x86 specific
|
# from pytorch/llvm:9.0.1 is x86 specific
|
||||||
SKIP_LLVM_SRC_BUILD_INSTALL=yes
|
SKIP_LLVM_SRC_BUILD_INSTALL=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-aarch64-py3.10-clang21)
|
pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.10
|
||||||
CLANG_VERSION=21
|
GCC_VERSION=11
|
||||||
ACL=yes
|
|
||||||
VISION=yes
|
|
||||||
OPENBLAS=yes
|
|
||||||
# snadampal: skipping llvm src build install because the current version
|
|
||||||
# from pytorch/llvm:9.0.1 is x86 specific
|
|
||||||
SKIP_LLVM_SRC_BUILD_INSTALL=yes
|
|
||||||
;;
|
|
||||||
pytorch-linux-jammy-aarch64-py3.10-gcc13-inductor-benchmarks)
|
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
|
||||||
GCC_VERSION=13
|
|
||||||
ACL=yes
|
ACL=yes
|
||||||
VISION=yes
|
VISION=yes
|
||||||
OPENBLAS=yes
|
OPENBLAS=yes
|
||||||
@ -376,7 +344,7 @@ docker build \
|
|||||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||||
--build-arg "KATEX=${KATEX:-}" \
|
--build-arg "KATEX=${KATEX:-}" \
|
||||||
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
|
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
|
||||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" \
|
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
|
||||||
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
|
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
|
||||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||||
@ -387,14 +355,12 @@ docker build \
|
|||||||
--build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
|
--build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
|
||||||
--build-arg "EXECUTORCH=${EXECUTORCH}" \
|
--build-arg "EXECUTORCH=${EXECUTORCH}" \
|
||||||
--build-arg "HALIDE=${HALIDE}" \
|
--build-arg "HALIDE=${HALIDE}" \
|
||||||
--build-arg "PALLAS=${PALLAS}" \
|
|
||||||
--build-arg "XPU_VERSION=${XPU_VERSION}" \
|
--build-arg "XPU_VERSION=${XPU_VERSION}" \
|
||||||
--build-arg "UNINSTALL_DILL=${UNINSTALL_DILL}" \
|
--build-arg "UNINSTALL_DILL=${UNINSTALL_DILL}" \
|
||||||
--build-arg "ACL=${ACL:-}" \
|
--build-arg "ACL=${ACL:-}" \
|
||||||
--build-arg "OPENBLAS=${OPENBLAS:-}" \
|
--build-arg "OPENBLAS=${OPENBLAS:-}" \
|
||||||
--build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
|
--build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
|
||||||
--build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
|
--build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
|
||||||
--build-arg "INSTALL_MINGW=${INSTALL_MINGW:-}" \
|
|
||||||
-f $(dirname ${DOCKERFILE})/Dockerfile \
|
-f $(dirname ${DOCKERFILE})/Dockerfile \
|
||||||
-t "$tmp_tag" \
|
-t "$tmp_tag" \
|
||||||
"$@" \
|
"$@" \
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
0.8.0
|
|
||||||
@ -1 +1 @@
|
|||||||
bfeb066872bc1e8b2d2bc0a3b295b99dd77206e7
|
27664085f804afc83df26f740bb46c365854f2c4
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
set -eux
|
set -eux
|
||||||
|
|
||||||
ACL_VERSION=${ACL_VERSION:-"v52.6.0"}
|
ACL_VERSION=${ACL_VERSION:-"v25.02"}
|
||||||
ACL_INSTALL_DIR="/acl"
|
ACL_INSTALL_DIR="/acl"
|
||||||
|
|
||||||
# Clone ACL
|
# Clone ACL
|
||||||
|
|||||||
@ -8,8 +8,8 @@ if [ -n "$CLANG_VERSION" ]; then
|
|||||||
# work around ubuntu apt-get conflicts
|
# work around ubuntu apt-get conflicts
|
||||||
sudo apt-get -y -f install
|
sudo apt-get -y -f install
|
||||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||||
if [[ $CLANG_VERSION -ge 18 ]]; then
|
if [[ $CLANG_VERSION == 18 ]]; then
|
||||||
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-${CLANG_VERSION} main"
|
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@ -49,20 +49,12 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
|||||||
export SYSROOT_DEP="sysroot_linux-64=2.17"
|
export SYSROOT_DEP="sysroot_linux-64=2.17"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install correct Python version
|
|
||||||
# Also ensure sysroot is using a modern GLIBC to match system compilers
|
|
||||||
if [ "$ANACONDA_PYTHON_VERSION" = "3.14" ]; then
|
|
||||||
as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\
|
|
||||||
python="3.14.0" \
|
|
||||||
${SYSROOT_DEP} \
|
|
||||||
-c conda-forge
|
|
||||||
else
|
|
||||||
# Install correct Python version
|
# Install correct Python version
|
||||||
# Also ensure sysroot is using a modern GLIBC to match system compilers
|
# Also ensure sysroot is using a modern GLIBC to match system compilers
|
||||||
as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\
|
as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\
|
||||||
python="$ANACONDA_PYTHON_VERSION" \
|
python="$ANACONDA_PYTHON_VERSION" \
|
||||||
${SYSROOT_DEP}
|
${SYSROOT_DEP}
|
||||||
fi
|
|
||||||
# libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30
|
# libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30
|
||||||
# which is provided in libstdcxx 12 and up.
|
# which is provided in libstdcxx 12 and up.
|
||||||
conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge
|
conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge
|
||||||
|
|||||||
@ -83,6 +83,10 @@ function build_cpython {
|
|||||||
py_suffix=${py_ver::-1}
|
py_suffix=${py_ver::-1}
|
||||||
py_folder=$py_suffix
|
py_folder=$py_suffix
|
||||||
fi
|
fi
|
||||||
|
# Update to rc2 due to https://github.com/python/cpython/commit/c72699086fe4
|
||||||
|
if [ "$py_suffix" == "3.14.0" ]; then
|
||||||
|
py_suffix="3.14.0rc2"
|
||||||
|
fi
|
||||||
wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
|
wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
|
||||||
do_cpython_build $py_ver Python-$py_suffix
|
do_cpython_build $py_ver Python-$py_suffix
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@ else
|
|||||||
arch_path='sbsa'
|
arch_path='sbsa'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
NVSHMEM_VERSION=3.4.5
|
NVSHMEM_VERSION=3.3.24
|
||||||
|
|
||||||
function install_cuda {
|
function install_cuda {
|
||||||
version=$1
|
version=$1
|
||||||
@ -150,7 +150,7 @@ function install_130 {
|
|||||||
CUDNN_VERSION=9.13.0.50
|
CUDNN_VERSION=9.13.0.50
|
||||||
echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
|
echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
|
||||||
# install CUDA 13.0 in the same container
|
# install CUDA 13.0 in the same container
|
||||||
install_cuda 13.0.2 cuda_13.0.2_580.95.05_linux
|
install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
|
||||||
|
|
||||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||||
install_cudnn 13 $CUDNN_VERSION
|
install_cudnn 13 $CUDNN_VERSION
|
||||||
|
|||||||
@ -7,11 +7,11 @@ if [ -n "$GCC_VERSION" ]; then
|
|||||||
# Need the official toolchain repo to get alternate packages
|
# Need the official toolchain repo to get alternate packages
|
||||||
add-apt-repository ppa:ubuntu-toolchain-r/test
|
add-apt-repository ppa:ubuntu-toolchain-r/test
|
||||||
apt-get update
|
apt-get update
|
||||||
apt-get install -y g++-$GCC_VERSION gfortran-$GCC_VERSION
|
apt-get install -y g++-$GCC_VERSION
|
||||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
|
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
|
||||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
|
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
|
||||||
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
|
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
|
||||||
update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-"$GCC_VERSION" 50
|
|
||||||
|
|
||||||
# Cleanup package manager
|
# Cleanup package manager
|
||||||
apt-get autoclean && apt-get clean
|
apt-get autoclean && apt-get clean
|
||||||
|
|||||||
@ -1,40 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
|
||||||
|
|
||||||
# Get the pinned JAX version (same for all CUDA versions)
|
|
||||||
JAX_VERSION=$(get_pinned_commit /ci_commit_pins/jax)
|
|
||||||
|
|
||||||
function install_jax_12() {
|
|
||||||
echo "Installing JAX ${JAX_VERSION} with CUDA 12 support"
|
|
||||||
pip_install "jax[cuda12]==${JAX_VERSION}" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
|
|
||||||
|
|
||||||
# Verify installation
|
|
||||||
python -c "import jax" # check for errors
|
|
||||||
echo "JAX ${JAX_VERSION} installation completed successfully for CUDA 12"
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_jax_13() {
|
|
||||||
echo "Installing JAX ${JAX_VERSION} with CUDA 13 support"
|
|
||||||
pip_install "jax[cuda13]==${JAX_VERSION}" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
|
|
||||||
|
|
||||||
# Verify installation
|
|
||||||
python -c "import jax" # check for errors
|
|
||||||
echo "JAX ${JAX_VERSION} installation completed successfully for CUDA 13"
|
|
||||||
}
|
|
||||||
|
|
||||||
# idiomatic parameter and option handling in sh
|
|
||||||
while test $# -gt 0
|
|
||||||
do
|
|
||||||
case "$1" in
|
|
||||||
12.4|12.6|12.6.*|12.8|12.8.*|12.9|12.9.*) install_jax_12;
|
|
||||||
;;
|
|
||||||
13.0|13.0.*) install_jax_13;
|
|
||||||
;;
|
|
||||||
*) echo "bad argument $1"; exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
@ -1,56 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Script used only in CD pipeline
|
|
||||||
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
# install dependencies
|
|
||||||
dnf -y install gmp-devel libmpc-devel texinfo flex bison
|
|
||||||
|
|
||||||
cd /usr/local/src
|
|
||||||
# fetch source for gcc 13
|
|
||||||
git clone --depth 1 --single-branch -b releases/gcc-13.3.0 https://github.com/gcc-mirror/gcc.git gcc-13.3.0
|
|
||||||
|
|
||||||
mkdir -p gcc-13.3.0/build-gomp
|
|
||||||
cd gcc-13.3.0/build-gomp
|
|
||||||
|
|
||||||
# configure gcc build
|
|
||||||
# I got these flags by:
|
|
||||||
# 1. downloading the source rpm for gcc-11 on AlmaLinux 8 container
|
|
||||||
# dnf install -y dnf-plugins-core rpmdevtools
|
|
||||||
# dnf download --source libgomp
|
|
||||||
# 2. extracting the gcc.spec from the source.
|
|
||||||
# rpmdev-extract gcc-xx.src.rpm
|
|
||||||
# 3. extracting optflags and ld_flags from gcc.spec:
|
|
||||||
# rpm --eval '%{optflags}'
|
|
||||||
# rpm --eval '%{build_ldflags}'
|
|
||||||
#
|
|
||||||
# I had to remove the following flags because they didn't compile for this version of libgomp:
|
|
||||||
# -Werror=format-security
|
|
||||||
# -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1
|
|
||||||
# -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1
|
|
||||||
#
|
|
||||||
# I added -march=armv8-a -mtune=generic to make them explicit. I don't think they're strictly needed.
|
|
||||||
|
|
||||||
OPT_FLAGS='-O2 -march=armv8-a -mtune=generic'\
|
|
||||||
' -fexceptions -g -grecord-gcc-switches -pipe -Wall'\
|
|
||||||
' -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS'\
|
|
||||||
' -fstack-protector-strong -fasynchronous-unwind-tables'\
|
|
||||||
' -fstack-clash-protection'
|
|
||||||
|
|
||||||
LDFLAGS='-Wl,-z,relro -Wl,--as-needed -Wl,-z,now'
|
|
||||||
|
|
||||||
CFLAGS="$OPT_FLAGS" \
|
|
||||||
CXXFLAGS="$OPT_FLAGS" \
|
|
||||||
LDFLAGS="$LDFLAGS" \
|
|
||||||
../configure \
|
|
||||||
--prefix=/usr \
|
|
||||||
--libdir=/usr/lib64 \
|
|
||||||
--enable-languages=c,c++ \
|
|
||||||
--disable-multilib \
|
|
||||||
--disable-bootstrap \
|
|
||||||
--enable-libgomp
|
|
||||||
|
|
||||||
# only build libgomp
|
|
||||||
make -j$(nproc) all-target-libgomp
|
|
||||||
|
|
||||||
make install-target-libgomp
|
|
||||||
@ -1,10 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
# Install MinGW-w64 for Windows cross-compilation
|
|
||||||
apt-get update
|
|
||||||
apt-get install -y g++-mingw-w64-x86-64-posix
|
|
||||||
|
|
||||||
echo "MinGW-w64 installed successfully"
|
|
||||||
x86_64-w64-mingw32-g++ --version
|
|
||||||
@ -19,8 +19,8 @@ pip_install \
|
|||||||
transformers==4.36.2
|
transformers==4.36.2
|
||||||
|
|
||||||
pip_install coloredlogs packaging
|
pip_install coloredlogs packaging
|
||||||
pip_install onnxruntime==1.23.1
|
pip_install onnxruntime==1.23.0
|
||||||
pip_install onnxscript==0.5.4
|
pip_install onnxscript==0.5.3
|
||||||
|
|
||||||
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
|
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
|
||||||
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
|
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
|
||||||
|
|||||||
@ -10,7 +10,6 @@ git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION}" -
|
|||||||
|
|
||||||
OPENBLAS_CHECKOUT_DIR="OpenBLAS"
|
OPENBLAS_CHECKOUT_DIR="OpenBLAS"
|
||||||
OPENBLAS_BUILD_FLAGS="
|
OPENBLAS_BUILD_FLAGS="
|
||||||
CC=gcc
|
|
||||||
NUM_THREADS=128
|
NUM_THREADS=128
|
||||||
USE_OPENMP=1
|
USE_OPENMP=1
|
||||||
NO_SHARED=0
|
NO_SHARED=0
|
||||||
|
|||||||
@ -40,7 +40,11 @@ EOF
|
|||||||
|
|
||||||
# Default url values
|
# Default url values
|
||||||
rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
|
rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
|
||||||
|
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
|
||||||
|
|
||||||
|
# Add amdgpu repository
|
||||||
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
|
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
|
||||||
|
echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
|
||||||
|
|
||||||
# Add rocm repository
|
# Add rocm repository
|
||||||
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
|
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
|
||||||
|
|||||||
@ -12,8 +12,8 @@ function do_install() {
|
|||||||
|
|
||||||
rocm_version_nodot=${rocm_version//./}
|
rocm_version_nodot=${rocm_version//./}
|
||||||
|
|
||||||
# post merge of https://github.com/icl-utk-edu/magma/pull/65
|
# https://github.com/icl-utk-edu/magma/pull/65
|
||||||
MAGMA_VERSION=c0792ae825fb36872784892ea643dd6f3456bc5f
|
MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
|
||||||
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
||||||
|
|
||||||
rocm_dir="/opt/rocm"
|
rocm_dir="/opt/rocm"
|
||||||
|
|||||||
@ -9,7 +9,7 @@ set -xe
|
|||||||
|
|
||||||
function install_ubuntu() {
|
function install_ubuntu() {
|
||||||
. /etc/os-release
|
. /etc/os-release
|
||||||
if [[ ! " jammy noble " =~ " ${VERSION_CODENAME} " ]]; then
|
if [[ ! " jammy " =~ " ${VERSION_CODENAME} " ]]; then
|
||||||
echo "Ubuntu version ${VERSION_CODENAME} not supported"
|
echo "Ubuntu version ${VERSION_CODENAME} not supported"
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
@ -35,24 +35,25 @@ function install_ubuntu() {
|
|||||||
# The xpu-smi packages
|
# The xpu-smi packages
|
||||||
apt-get install -y flex bison xpu-smi
|
apt-get install -y flex bison xpu-smi
|
||||||
|
|
||||||
# Compute and Media Runtimes
|
if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
|
||||||
if [[ " ${VERSION_CODENAME} " =~ " noble " ]]; then
|
# Compute and Media Runtimes
|
||||||
apt-get install -y \
|
apt-get install -y \
|
||||||
intel-opencl-icd libze-intel-gpu1 libze1 \
|
intel-opencl-icd intel-level-zero-gpu level-zero \
|
||||||
intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
|
intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
|
||||||
libegl-mesa0 libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||||
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc
|
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
|
||||||
else # jammy
|
# Development Packages
|
||||||
|
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
|
||||||
|
else # rolling driver
|
||||||
apt-get install -y \
|
apt-get install -y \
|
||||||
intel-opencl-icd libze-intel-gpu1 libze1 \
|
intel-opencl-icd libze-intel-gpu1 libze1 \
|
||||||
intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
|
intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
|
||||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||||
libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc
|
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc
|
||||||
|
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev
|
||||||
fi
|
fi
|
||||||
# Development Packages
|
|
||||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev
|
|
||||||
|
|
||||||
# Install Intel Support Packages
|
# Install Intel Support Packages
|
||||||
apt-get install -y ${XPU_PACKAGES}
|
apt-get install -y ${XPU_PACKAGES}
|
||||||
@ -65,7 +66,7 @@ function install_ubuntu() {
|
|||||||
function install_rhel() {
|
function install_rhel() {
|
||||||
. /etc/os-release
|
. /etc/os-release
|
||||||
if [[ "${ID}" == "rhel" ]]; then
|
if [[ "${ID}" == "rhel" ]]; then
|
||||||
if [[ ! " 8.8 8.10 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
|
if [[ ! " 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
|
||||||
echo "RHEL version ${VERSION_ID} not supported"
|
echo "RHEL version ${VERSION_ID} not supported"
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
@ -146,7 +147,7 @@ function install_sles() {
|
|||||||
XPU_DRIVER_VERSION=""
|
XPU_DRIVER_VERSION=""
|
||||||
if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
|
if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
|
||||||
# Use GPU driver LTS releases
|
# Use GPU driver LTS releases
|
||||||
XPU_DRIVER_VERSION="/lts/2523"
|
XPU_DRIVER_VERSION="/lts/2350"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Default use Intel® oneAPI Deep Learning Essentials 2025.1
|
# Default use Intel® oneAPI Deep Learning Essentials 2025.1
|
||||||
|
|||||||
@ -39,17 +39,17 @@ case ${DOCKER_TAG_PREFIX} in
|
|||||||
DOCKER_GPU_BUILD_ARG=""
|
DOCKER_GPU_BUILD_ARG=""
|
||||||
;;
|
;;
|
||||||
rocm*)
|
rocm*)
|
||||||
# we want the patch version of 7.0 instead
|
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
|
||||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
|
||||||
fi
|
|
||||||
# we want the patch version of 6.4 instead
|
# we want the patch version of 6.4 instead
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
|
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
|
||||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.4"
|
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
||||||
fi
|
fi
|
||||||
BASE_TARGET=rocm
|
BASE_TARGET=rocm
|
||||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx950;gfx1150;gfx1151"
|
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
|
# add gfx950 conditionally starting in ROCm 7.0
|
||||||
|
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
||||||
|
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
||||||
|
fi
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
|||||||
@ -149,7 +149,7 @@ FROM cpu_final as rocm_final
|
|||||||
ARG ROCM_VERSION=6.0
|
ARG ROCM_VERSION=6.0
|
||||||
ARG PYTORCH_ROCM_ARCH
|
ARG PYTORCH_ROCM_ARCH
|
||||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||||
ARG DEVTOOLSET_VERSION=13
|
ARG DEVTOOLSET_VERSION=11
|
||||||
ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib"
|
ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib"
|
||||||
# Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path,
|
# Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path,
|
||||||
# below workaround helps avoid error
|
# below workaround helps avoid error
|
||||||
|
|||||||
@ -50,10 +50,6 @@ RUN rm install_ninja.sh
|
|||||||
ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
|
ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
|
||||||
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
|
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
# Build a newer version of libgomp than that supported in in Almalinux 8.
|
|
||||||
COPY ./common/install_libgomp.sh install_libgomp.sh
|
|
||||||
RUN bash ./install_libgomp.sh && rm install_libgomp.sh
|
|
||||||
|
|
||||||
# git236+ would refuse to run git commands in repos owned by other users
|
# git236+ would refuse to run git commands in repos owned by other users
|
||||||
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
|
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
|
||||||
# Override this behaviour by treating every folder as safe
|
# Override this behaviour by treating every folder as safe
|
||||||
|
|||||||
@ -115,9 +115,6 @@ RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
|
|||||||
# cmake-3.28.0 from pip for onnxruntime
|
# cmake-3.28.0 from pip for onnxruntime
|
||||||
RUN python3 -mpip install cmake==3.28.0
|
RUN python3 -mpip install cmake==3.28.0
|
||||||
|
|
||||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
|
||||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
|
||||||
|
|
||||||
# build onnxruntime 1.21.0 from sources.
|
# build onnxruntime 1.21.0 from sources.
|
||||||
# it is not possible to build it from sources using pip,
|
# it is not possible to build it from sources using pip,
|
||||||
# so just build it from upstream repository.
|
# so just build it from upstream repository.
|
||||||
|
|||||||
@ -75,25 +75,25 @@ case ${image} in
|
|||||||
DOCKERFILE_SUFFIX="_cuda_aarch64"
|
DOCKERFILE_SUFFIX="_cuda_aarch64"
|
||||||
;;
|
;;
|
||||||
manylinux2_28-builder:rocm*)
|
manylinux2_28-builder:rocm*)
|
||||||
# we want the patch version of 7.0 instead
|
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
|
||||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
|
||||||
fi
|
|
||||||
# we want the patch version of 6.4 instead
|
# we want the patch version of 6.4 instead
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
|
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
|
||||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.4"
|
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
||||||
fi
|
fi
|
||||||
TARGET=rocm_final
|
TARGET=rocm_final
|
||||||
MANY_LINUX_VERSION="2_28"
|
MANY_LINUX_VERSION="2_28"
|
||||||
DEVTOOLSET_VERSION="11"
|
DEVTOOLSET_VERSION="11"
|
||||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx950;gfx1150;gfx1151"
|
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
|
# add gfx950 conditionally starting in ROCm 7.0
|
||||||
|
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
||||||
|
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
||||||
|
fi
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||||
;;
|
;;
|
||||||
manylinux2_28-builder:xpu)
|
manylinux2_28-builder:xpu)
|
||||||
TARGET=xpu_final
|
TARGET=xpu_final
|
||||||
GPU_IMAGE=amd64/almalinux:8
|
GPU_IMAGE=amd64/almalinux:8
|
||||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13"
|
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||||
MANY_LINUX_VERSION="2_28"
|
MANY_LINUX_VERSION="2_28"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
|||||||
@ -10,6 +10,11 @@ BAD_SSL = "https://self-signed.badssl.com"
|
|||||||
|
|
||||||
print("Testing SSL certificate checking for Python:", sys.version)
|
print("Testing SSL certificate checking for Python:", sys.version)
|
||||||
|
|
||||||
|
if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
|
||||||
|
print("This version never checks SSL certs; skipping tests")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
EXC = OSError
|
EXC = OSError
|
||||||
|
|
||||||
print(f"Connecting to {GOOD_SSL} should work")
|
print(f"Connecting to {GOOD_SSL} should work")
|
||||||
|
|||||||
@ -120,8 +120,9 @@ ninja==1.11.1.4
|
|||||||
numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
|
numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
|
||||||
numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
||||||
#Description: Just-In-Time Compiler for Numerical Functions
|
#Description: Just-In-Time Compiler for Numerical Functions
|
||||||
#Pinned versions: 0.55.2, 0.60.0
|
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
|
||||||
#test that import: test_numba_integration.py
|
#test that import: test_numba_integration.py
|
||||||
|
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
|
||||||
#Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073
|
#Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073
|
||||||
|
|
||||||
#numpy
|
#numpy
|
||||||
@ -138,12 +139,10 @@ numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
|||||||
#test_binary_ufuncs.py
|
#test_binary_ufuncs.py
|
||||||
numpy==1.22.4; python_version == "3.10"
|
numpy==1.22.4; python_version == "3.10"
|
||||||
numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
|
numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
|
||||||
numpy==2.1.2; python_version >= "3.13" and python_version < "3.14"
|
numpy==2.1.2; python_version >= "3.13"
|
||||||
numpy==2.3.4; python_version >= "3.14"
|
|
||||||
|
|
||||||
pandas==2.0.3; python_version < "3.13"
|
pandas==2.0.3; python_version < "3.13"
|
||||||
pandas==2.2.3; python_version >= "3.13" and python_version < "3.14"
|
pandas==2.2.3; python_version >= "3.13"
|
||||||
pandas==2.3.3; python_version >= "3.14"
|
|
||||||
|
|
||||||
#onnxruntime
|
#onnxruntime
|
||||||
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
|
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
|
||||||
@ -155,8 +154,7 @@ opt-einsum==3.3
|
|||||||
#Pinned versions: 3.3
|
#Pinned versions: 3.3
|
||||||
#test that import: test_linalg.py
|
#test that import: test_linalg.py
|
||||||
|
|
||||||
optree==0.13.0 ; python_version < "3.14"
|
optree==0.13.0
|
||||||
optree==0.17.0 ; python_version >= "3.14"
|
|
||||||
#Description: A library for tree manipulation
|
#Description: A library for tree manipulation
|
||||||
#Pinned versions: 0.13.0
|
#Pinned versions: 0.13.0
|
||||||
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
|
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
|
||||||
@ -244,9 +242,10 @@ pygments==2.15.0
|
|||||||
#Pinned versions: 14.1.0
|
#Pinned versions: 14.1.0
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
scikit-image==0.22.0
|
scikit-image==0.19.3 ; python_version < "3.10"
|
||||||
|
scikit-image==0.22.0 ; python_version >= "3.10"
|
||||||
#Description: image processing routines
|
#Description: image processing routines
|
||||||
#Pinned versions: 0.22.0
|
#Pinned versions:
|
||||||
#test that import: test_nn.py
|
#test that import: test_nn.py
|
||||||
|
|
||||||
#scikit-learn
|
#scikit-learn
|
||||||
@ -255,8 +254,7 @@ scikit-image==0.22.0
|
|||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
scipy==1.10.1 ; python_version <= "3.11"
|
scipy==1.10.1 ; python_version <= "3.11"
|
||||||
scipy==1.14.1 ; python_version > "3.11" and python_version < "3.14"
|
scipy==1.14.1 ; python_version >= "3.12"
|
||||||
scipy==1.16.2 ; python_version >= "3.14"
|
|
||||||
# Pin SciPy because of failing distribution tests (see #60347)
|
# Pin SciPy because of failing distribution tests (see #60347)
|
||||||
#Description: scientific python
|
#Description: scientific python
|
||||||
#Pinned versions: 1.10.1
|
#Pinned versions: 1.10.1
|
||||||
@ -328,8 +326,7 @@ pywavelets==1.7.0 ; python_version >= "3.12"
|
|||||||
#Pinned versions: 1.4.1
|
#Pinned versions: 1.4.1
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
lxml==5.3.0 ; python_version < "3.14"
|
lxml==5.3.0
|
||||||
lxml==6.0.2 ; python_version >= "3.14"
|
|
||||||
#Description: This is a requirement of unittest-xml-reporting
|
#Description: This is a requirement of unittest-xml-reporting
|
||||||
|
|
||||||
PyGithub==2.3.0
|
PyGithub==2.3.0
|
||||||
@ -339,14 +336,12 @@ sympy==1.13.3
|
|||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
onnx==1.19.1 ; python_version < "3.14"
|
onnx==1.18.0
|
||||||
# Unpin once Python 3.14 is supported. See onnxruntime issue 26309.
|
|
||||||
onnx==1.18.0 ; python_version == "3.14"
|
|
||||||
#Description: Required by onnx tests, and mypy and test_public_bindings.py when checking torch.onnx._internal
|
#Description: Required by onnx tests, and mypy and test_public_bindings.py when checking torch.onnx._internal
|
||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
onnxscript==0.5.4
|
onnxscript==0.5.3
|
||||||
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
|
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
|
||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
@ -366,7 +361,7 @@ pwlf==2.2.1
|
|||||||
#test that import: test_sac_estimator.py
|
#test that import: test_sac_estimator.py
|
||||||
|
|
||||||
# To build PyTorch itself
|
# To build PyTorch itself
|
||||||
pyyaml==6.0.3
|
pyyaml==6.0.2
|
||||||
pyzstd
|
pyzstd
|
||||||
setuptools==78.1.1
|
setuptools==78.1.1
|
||||||
packaging==23.1
|
packaging==23.1
|
||||||
|
|||||||
@ -1,11 +1,15 @@
|
|||||||
sphinx==7.2.6
|
sphinx==5.3.0
|
||||||
#Description: This is used to generate PyTorch docs
|
#Description: This is used to generate PyTorch docs
|
||||||
#Pinned versions: 7.2.6
|
#Pinned versions: 5.3.0
|
||||||
|
|
||||||
pytorch_sphinx_theme2==0.2.0
|
standard-imghdr==3.13.0; python_version >= "3.13"
|
||||||
#Description: This is needed to generate PyTorch docs
|
#Description: This is needed by Sphinx, so it needs to be added here.
|
||||||
#Pinned versions: 0.2.0
|
# The reasons are as follows:
|
||||||
|
# 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr);
|
||||||
|
# 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13.
|
||||||
|
# Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency.
|
||||||
|
|
||||||
|
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
|
||||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||||
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
||||||
# something related to Docker setup. We can investigate this later.
|
# something related to Docker setup. We can investigate this later.
|
||||||
@ -32,17 +36,17 @@ tensorboard==2.18.0 ; python_version >= "3.13"
|
|||||||
#Description: This is used to generate PyTorch docs
|
#Description: This is used to generate PyTorch docs
|
||||||
#Pinned versions: 2.13.0
|
#Pinned versions: 2.13.0
|
||||||
|
|
||||||
breathe==4.36.0
|
breathe==4.34.0
|
||||||
#Description: This is used to generate PyTorch C++ docs
|
#Description: This is used to generate PyTorch C++ docs
|
||||||
#Pinned versions: 4.36.0
|
#Pinned versions: 4.34.0
|
||||||
|
|
||||||
exhale==0.3.7
|
exhale==0.2.3
|
||||||
#Description: This is used to generate PyTorch C++ docs
|
#Description: This is used to generate PyTorch C++ docs
|
||||||
#Pinned versions: 0.3.7
|
#Pinned versions: 0.2.3
|
||||||
|
|
||||||
docutils==0.20
|
docutils==0.16
|
||||||
#Description: This is used to generate PyTorch C++ docs
|
#Description: This is used to generate PyTorch C++ docs
|
||||||
#Pinned versions: 0.20
|
#Pinned versions: 0.16
|
||||||
|
|
||||||
bs4==0.0.1
|
bs4==0.0.1
|
||||||
#Description: This is used to generate PyTorch C++ docs
|
#Description: This is used to generate PyTorch C++ docs
|
||||||
@ -52,13 +56,13 @@ IPython==8.12.0
|
|||||||
#Description: This is used to generate PyTorch functorch docs
|
#Description: This is used to generate PyTorch functorch docs
|
||||||
#Pinned versions: 8.12.0
|
#Pinned versions: 8.12.0
|
||||||
|
|
||||||
myst-nb==1.3.0
|
myst-nb==0.17.2
|
||||||
#Description: This is used to generate PyTorch functorch and torch.compile docs.
|
#Description: This is used to generate PyTorch functorch and torch.compile docs.
|
||||||
#Pinned versions: 1.3.0
|
#Pinned versions: 0.17.2
|
||||||
|
|
||||||
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
||||||
python-etcd==0.4.5
|
python-etcd==0.4.5
|
||||||
sphinx-copybutton==0.5.0
|
sphinx-copybutton==0.5.0
|
||||||
sphinx-design==0.6.1
|
sphinx-design==0.4.0
|
||||||
sphinxcontrib-mermaid==1.0.0
|
sphinxcontrib-mermaid==1.0.0
|
||||||
myst-parser==4.0.1
|
myst-parser==0.18.1
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
3.5.1
|
3.5.0
|
||||||
|
|||||||
@ -54,15 +54,12 @@ ENV OPENSSL_DIR /opt/openssl
|
|||||||
RUN rm install_openssl.sh
|
RUN rm install_openssl.sh
|
||||||
|
|
||||||
ARG INDUCTOR_BENCHMARKS
|
ARG INDUCTOR_BENCHMARKS
|
||||||
ARG ANACONDA_PYTHON_VERSION
|
|
||||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
|
||||||
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
||||||
COPY ./common/common_utils.sh common_utils.sh
|
COPY ./common/common_utils.sh common_utils.sh
|
||||||
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
|
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
|
||||||
COPY ci_commit_pins/timm.txt timm.txt
|
COPY ci_commit_pins/timm.txt timm.txt
|
||||||
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
|
||||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
|
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt
|
||||||
|
|
||||||
# Install XPU Dependencies
|
# Install XPU Dependencies
|
||||||
ARG XPU_VERSION
|
ARG XPU_VERSION
|
||||||
|
|||||||
@ -100,16 +100,9 @@ COPY ./common/common_utils.sh common_utils.sh
|
|||||||
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
|
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
|
||||||
COPY ci_commit_pins/timm.txt timm.txt
|
COPY ci_commit_pins/timm.txt timm.txt
|
||||||
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
||||||
# Only build aoti cpp tests when INDUCTOR_BENCHMARKS is set to True
|
|
||||||
ENV BUILD_AOT_INDUCTOR_TEST ${INDUCTOR_BENCHMARKS}
|
|
||||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
|
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
|
||||||
|
|
||||||
ARG INSTALL_MINGW
|
|
||||||
COPY ./common/install_mingw.sh install_mingw.sh
|
|
||||||
RUN if [ -n "${INSTALL_MINGW}" ]; then bash ./install_mingw.sh; fi
|
|
||||||
RUN rm install_mingw.sh
|
|
||||||
|
|
||||||
ARG TRITON
|
ARG TRITON
|
||||||
ARG TRITON_CPU
|
ARG TRITON_CPU
|
||||||
|
|
||||||
@ -143,15 +136,6 @@ COPY ci_commit_pins/halide.txt halide.txt
|
|||||||
RUN if [ -n "${HALIDE}" ]; then bash ./install_halide.sh; fi
|
RUN if [ -n "${HALIDE}" ]; then bash ./install_halide.sh; fi
|
||||||
RUN rm install_halide.sh common_utils.sh halide.txt
|
RUN rm install_halide.sh common_utils.sh halide.txt
|
||||||
|
|
||||||
ARG PALLAS
|
|
||||||
ARG CUDA_VERSION
|
|
||||||
# Install JAX with CUDA support (for Pallas)
|
|
||||||
COPY ./common/install_jax.sh install_jax.sh
|
|
||||||
COPY ./common/common_utils.sh common_utils.sh
|
|
||||||
COPY ./ci_commit_pins/jax.txt /ci_commit_pins/jax.txt
|
|
||||||
RUN if [ -n "${PALLAS}" ]; then bash ./install_jax.sh ${CUDA_VERSION}; fi
|
|
||||||
RUN rm -f install_jax.sh common_utils.sh /ci_commit_pins/jax.txt
|
|
||||||
|
|
||||||
ARG ONNX
|
ARG ONNX
|
||||||
# Install ONNX dependencies
|
# Install ONNX dependencies
|
||||||
COPY ./common/install_onnx.sh ./common/common_utils.sh ./
|
COPY ./common/install_onnx.sh ./common/common_utils.sh ./
|
||||||
|
|||||||
@ -8,11 +8,9 @@ from abc import ABC, abstractmethod
|
|||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from collections.abc import Callable # Python 3.11+
|
from typing import Any, Callable, Required, TypedDict # Python 3.11+
|
||||||
from typing import Any, Required, TypedDict
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from collections.abc import Callable
|
from typing import Any, Callable, TypedDict
|
||||||
from typing import Any, TypedDict
|
|
||||||
|
|
||||||
from typing_extensions import Required # Fallback for Python <3.11
|
from typing_extensions import Required # Fallback for Python <3.11
|
||||||
|
|
||||||
|
|||||||
@ -57,8 +57,8 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
|
|||||||
logger.info("Successfully cloned %s", target)
|
logger.info("Successfully cloned %s", target)
|
||||||
return r, commit
|
return r, commit
|
||||||
|
|
||||||
except GitCommandError:
|
except GitCommandError as e:
|
||||||
logger.exception("Git operation failed")
|
logger.error("Git operation failed: %s", e)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -143,7 +143,7 @@ def sample_vllm_test_library():
|
|||||||
"pytest -v -s compile/test_decorator.py",
|
"pytest -v -s compile/test_decorator.py",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"vllm_language_model_test_extended_generation_28_failure_test": {
|
"vllm_languagde_model_test_extended_generation_28_failure_test": {
|
||||||
"title": "Language Models Test (Extended Generation) 2.8 release failure",
|
"title": "Language Models Test (Extended Generation) 2.8 release failure",
|
||||||
"id": "vllm_languagde_model_test_extended_generation_28_failure_test",
|
"id": "vllm_languagde_model_test_extended_generation_28_failure_test",
|
||||||
"package_install": [
|
"package_install": [
|
||||||
|
|||||||
@ -63,7 +63,7 @@ class VllmBuildParameters:
|
|||||||
# DOCKERFILE_PATH: path to Dockerfile used when use_local_dockerfile is True"
|
# DOCKERFILE_PATH: path to Dockerfile used when use_local_dockerfile is True"
|
||||||
use_local_dockerfile: bool = env_bool_field("USE_LOCAL_DOCKERFILE", True)
|
use_local_dockerfile: bool = env_bool_field("USE_LOCAL_DOCKERFILE", True)
|
||||||
dockerfile_path: Path = env_path_field(
|
dockerfile_path: Path = env_path_field(
|
||||||
"DOCKERFILE_PATH", ".github/ci_configs/vllm/Dockerfile"
|
"DOCKERFILE_PATH", ".github/ci_configs/vllm/Dockerfile.tmp_vllm"
|
||||||
)
|
)
|
||||||
|
|
||||||
# the cleaning script to remove torch dependencies from pip
|
# the cleaning script to remove torch dependencies from pip
|
||||||
|
|||||||
@ -6,7 +6,7 @@ dependencies = [
|
|||||||
"GitPython==3.1.45",
|
"GitPython==3.1.45",
|
||||||
"docker==7.1.0",
|
"docker==7.1.0",
|
||||||
"pytest==7.3.2",
|
"pytest==7.3.2",
|
||||||
"uv==0.9.6"
|
"uv==0.8.6"
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
SHELL=/usr/bin/env bash
|
SHELL=/usr/bin/env bash
|
||||||
|
|
||||||
DOCKER_CMD ?= docker
|
DOCKER_CMD ?= docker
|
||||||
DESIRED_ROCM ?= 7.1
|
DESIRED_ROCM ?= 7.0
|
||||||
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
||||||
PACKAGE_NAME = magma-rocm
|
PACKAGE_NAME = magma-rocm
|
||||||
# inherit this from underlying docker image, do not pass this env var to docker
|
# inherit this from underlying docker image, do not pass this env var to docker
|
||||||
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201
|
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
|
||||||
|
|
||||||
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||||
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
|
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
|
||||||
@ -16,20 +16,15 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
|||||||
magma-rocm/build_magma.sh
|
magma-rocm/build_magma.sh
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
all: magma-rocm71
|
|
||||||
all: magma-rocm70
|
all: magma-rocm70
|
||||||
all: magma-rocm64
|
all: magma-rocm64
|
||||||
|
all: magma-rocm63
|
||||||
|
|
||||||
.PHONY:
|
.PHONY:
|
||||||
clean:
|
clean:
|
||||||
$(RM) -r magma-*
|
$(RM) -r magma-*
|
||||||
$(RM) -r output
|
$(RM) -r output
|
||||||
|
|
||||||
.PHONY: magma-rocm71
|
|
||||||
magma-rocm71: DESIRED_ROCM := 7.1
|
|
||||||
magma-rocm71:
|
|
||||||
$(DOCKER_RUN)
|
|
||||||
|
|
||||||
.PHONY: magma-rocm70
|
.PHONY: magma-rocm70
|
||||||
magma-rocm70: DESIRED_ROCM := 7.0
|
magma-rocm70: DESIRED_ROCM := 7.0
|
||||||
magma-rocm70:
|
magma-rocm70:
|
||||||
@ -39,3 +34,8 @@ magma-rocm70:
|
|||||||
magma-rocm64: DESIRED_ROCM := 6.4
|
magma-rocm64: DESIRED_ROCM := 6.4
|
||||||
magma-rocm64:
|
magma-rocm64:
|
||||||
$(DOCKER_RUN)
|
$(DOCKER_RUN)
|
||||||
|
|
||||||
|
.PHONY: magma-rocm63
|
||||||
|
magma-rocm63: DESIRED_ROCM := 6.3
|
||||||
|
magma-rocm63:
|
||||||
|
$(DOCKER_RUN)
|
||||||
|
|||||||
@ -187,22 +187,19 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
|
|||||||
export USE_CUFILE=0
|
export USE_CUFILE=0
|
||||||
else
|
else
|
||||||
DEPS_LIST+=(
|
DEPS_LIST+=(
|
||||||
|
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
|
||||||
"/usr/local/cuda/lib64/libcublas.so.12"
|
"/usr/local/cuda/lib64/libcublas.so.12"
|
||||||
"/usr/local/cuda/lib64/libcublasLt.so.12"
|
"/usr/local/cuda/lib64/libcublasLt.so.12"
|
||||||
"/usr/local/cuda/lib64/libcudart.so.12"
|
"/usr/local/cuda/lib64/libcudart.so.12"
|
||||||
"/usr/local/cuda/lib64/libnvrtc.so.12"
|
"/usr/local/cuda/lib64/libnvrtc.so.12"
|
||||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12")
|
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12")
|
||||||
DEPS_SONAME+=(
|
DEPS_SONAME+=(
|
||||||
|
"libnvToolsExt.so.1"
|
||||||
"libcublas.so.12"
|
"libcublas.so.12"
|
||||||
"libcublasLt.so.12"
|
"libcublasLt.so.12"
|
||||||
"libcudart.so.12"
|
"libcudart.so.12"
|
||||||
"libnvrtc.so.12"
|
"libnvrtc.so.12"
|
||||||
"libcupti.so.12")
|
"libcupti.so.12")
|
||||||
|
|
||||||
if [[ $CUDA_VERSION != 12.9* ]]; then
|
|
||||||
DEPS_LIST+=("/usr/local/cuda/lib64/libnvToolsExt.so.1")
|
|
||||||
DEPS_SONAME+=("libnvToolsExt.so.1")
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Using nvidia libs from pypi."
|
echo "Using nvidia libs from pypi."
|
||||||
|
|||||||
@ -168,16 +168,14 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
|||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source /opt/intel/oneapi/compiler/latest/env/vars.sh
|
source /opt/intel/oneapi/compiler/latest/env/vars.sh
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source /opt/intel/oneapi/umf/latest/env/vars.sh
|
|
||||||
# shellcheck disable=SC1091
|
|
||||||
source /opt/intel/oneapi/ccl/latest/env/vars.sh
|
source /opt/intel/oneapi/ccl/latest/env/vars.sh
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source /opt/intel/oneapi/mpi/latest/env/vars.sh
|
source /opt/intel/oneapi/mpi/latest/env/vars.sh
|
||||||
# shellcheck disable=SC1091
|
|
||||||
source /opt/intel/oneapi/pti/latest/env/vars.sh
|
|
||||||
# Enable XCCL build
|
# Enable XCCL build
|
||||||
export USE_XCCL=1
|
export USE_XCCL=1
|
||||||
export USE_MPI=0
|
export USE_MPI=0
|
||||||
|
# XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA
|
||||||
|
export USE_KINETO=0
|
||||||
export TORCH_XPU_ARCH_LIST=pvc
|
export TORCH_XPU_ARCH_LIST=pvc
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -235,9 +233,7 @@ if [[ "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
|
|||||||
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
|
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *-full-debug* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
|
||||||
export CMAKE_BUILD_TYPE=Debug
|
|
||||||
elif [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
|
|
||||||
export CMAKE_BUILD_TYPE=RelWithAssert
|
export CMAKE_BUILD_TYPE=RelWithAssert
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -303,11 +299,6 @@ else
|
|||||||
python -m build --wheel --no-isolation
|
python -m build --wheel --no-isolation
|
||||||
fi
|
fi
|
||||||
pip_install_whl "$(echo dist/*.whl)"
|
pip_install_whl "$(echo dist/*.whl)"
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *full-debug* ]]; then
|
|
||||||
# Regression test for https://github.com/pytorch/pytorch/issues/164297
|
|
||||||
# Torch should be importable and that's about it
|
|
||||||
pushd /; python -c "import torch;print(torch.__config__.show(), torch.randn(5) + 1.7)"; popd
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "${BUILD_ADDITIONAL_PACKAGES:-}" == *vision* ]]; then
|
if [[ "${BUILD_ADDITIONAL_PACKAGES:-}" == *vision* ]]; then
|
||||||
install_torchvision
|
install_torchvision
|
||||||
@ -428,7 +419,7 @@ fi
|
|||||||
if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; then
|
||||||
# export test times so that potential sharded tests that'll branch off this build will use consistent data
|
# export test times so that potential sharded tests that'll branch off this build will use consistent data
|
||||||
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
|
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
|
||||||
PYTHONPATH=. python tools/stats/export_test_times.py
|
python tools/stats/export_test_times.py
|
||||||
fi
|
fi
|
||||||
# don't do this for bazel or s390x or riscv64 as they don't use sccache
|
# don't do this for bazel or s390x or riscv64 as they don't use sccache
|
||||||
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *riscv64* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *riscv64* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
|
||||||
|
|||||||
@ -67,7 +67,7 @@ fi
|
|||||||
# wheels with cxx11-abi
|
# wheels with cxx11-abi
|
||||||
|
|
||||||
echo "Checking that the gcc ABI is what we expect"
|
echo "Checking that the gcc ABI is what we expect"
|
||||||
if [[ "$(uname)" != 'Darwin' ]]; then
|
if [[ "$(uname)" != 'Darwin' && "$(uname -m)" != "s390x" ]]; then
|
||||||
# We also check that there are cxx11 symbols in libtorch
|
# We also check that there are cxx11 symbols in libtorch
|
||||||
#
|
#
|
||||||
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
||||||
|
|||||||
@ -256,7 +256,7 @@ test_torchbench_smoketest() {
|
|||||||
local device=mps
|
local device=mps
|
||||||
local dtypes=(undefined float16 bfloat16 notset)
|
local dtypes=(undefined float16 bfloat16 notset)
|
||||||
local dtype=${dtypes[$1]}
|
local dtype=${dtypes[$1]}
|
||||||
local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor vgg16)
|
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
|
||||||
|
|
||||||
for backend in eager inductor; do
|
for backend in eager inductor; do
|
||||||
|
|
||||||
@ -319,7 +319,7 @@ test_aoti_torchbench_smoketest() {
|
|||||||
local device=mps
|
local device=mps
|
||||||
local dtypes=(undefined float16 bfloat16 notset)
|
local dtypes=(undefined float16 bfloat16 notset)
|
||||||
local dtype=${dtypes[$1]}
|
local dtype=${dtypes[$1]}
|
||||||
local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor vgg16)
|
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
|
||||||
|
|
||||||
echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}"
|
echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}"
|
||||||
local dtype_arg="--${dtype}"
|
local dtype_arg="--${dtype}"
|
||||||
|
|||||||
@ -89,41 +89,23 @@ if [ "$is_main_doc" = true ]; then
|
|||||||
|
|
||||||
make coverage
|
make coverage
|
||||||
# Now we have the coverage report, we need to make sure it is empty.
|
# Now we have the coverage report, we need to make sure it is empty.
|
||||||
# Sphinx 7.2.6+ format: python.txt contains a statistics table with a TOTAL row
|
# Count the number of lines in the file and turn that number into a variable
|
||||||
# showing the undocumented count in the third column.
|
# $lines. The `cut -f1 ...` is to only parse the number, not the filename
|
||||||
# Example: | TOTAL | 99.83% | 2 |
|
# Skip the report header by subtracting 2: the header will be output even if
|
||||||
|
# there are no undocumented items.
|
||||||
#
|
#
|
||||||
# Also: see docs/source/conf.py for "coverage_ignore*" items, which should
|
# Also: see docs/source/conf.py for "coverage_ignore*" items, which should
|
||||||
# be documented then removed from there.
|
# be documented then removed from there.
|
||||||
|
lines=$(wc -l build/coverage/python.txt 2>/dev/null |cut -f1 -d' ')
|
||||||
# Extract undocumented count from TOTAL row in Sphinx 7.2.6 statistics table
|
undocumented=$((lines - 2))
|
||||||
# The table format is: | Module | Coverage | Undocumented |
|
if [ $undocumented -lt 0 ]; then
|
||||||
# Extract the third column (undocumented count) from the TOTAL row
|
|
||||||
undocumented=$(grep "| TOTAL" build/coverage/python.txt | awk -F'|' '{print $4}' | tr -d ' ')
|
|
||||||
|
|
||||||
if [ -z "$undocumented" ] || ! [[ "$undocumented" =~ ^[0-9]+$ ]]; then
|
|
||||||
echo coverage output not found
|
echo coverage output not found
|
||||||
exit 1
|
exit 1
|
||||||
elif [ "$undocumented" -gt 0 ]; then
|
elif [ $undocumented -gt 0 ]; then
|
||||||
set +x # Disable command echoing for cleaner output
|
echo undocumented objects found:
|
||||||
echo ""
|
cat build/coverage/python.txt
|
||||||
echo "====================="
|
|
||||||
echo "UNDOCUMENTED OBJECTS:"
|
|
||||||
echo "====================="
|
|
||||||
echo ""
|
|
||||||
# Find the line number of the TOTAL row and print only what comes after it
|
|
||||||
total_line=$(grep -n "| TOTAL" build/coverage/python.txt | cut -d: -f1)
|
|
||||||
if [ -n "$total_line" ]; then
|
|
||||||
# Print only the detailed list (skip the statistics table)
|
|
||||||
tail -n +$((total_line + 2)) build/coverage/python.txt
|
|
||||||
else
|
|
||||||
# Fallback to showing entire file if TOTAL line not found
|
|
||||||
cat build/coverage/python.txt
|
|
||||||
fi
|
|
||||||
echo ""
|
|
||||||
echo "Make sure you've updated relevant .rsts in docs/source!"
|
echo "Make sure you've updated relevant .rsts in docs/source!"
|
||||||
echo "You can reproduce locally by running 'cd docs && make coverage && tail -n +\$((grep -n \"| TOTAL\" build/coverage/python.txt | cut -d: -f1) + 2)) build/coverage/python.txt'"
|
echo "You can reproduce locally by running 'cd docs && make coverage && cat build/coverage/python.txt'"
|
||||||
set -x # Re-enable command echoing
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
|
|||||||
@ -208,8 +208,6 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
|||||||
source /opt/intel/oneapi/ccl/latest/env/vars.sh
|
source /opt/intel/oneapi/ccl/latest/env/vars.sh
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source /opt/intel/oneapi/mpi/latest/env/vars.sh
|
source /opt/intel/oneapi/mpi/latest/env/vars.sh
|
||||||
# shellcheck disable=SC1091
|
|
||||||
source /opt/intel/oneapi/pti/latest/env/vars.sh
|
|
||||||
# Check XPU status before testing
|
# Check XPU status before testing
|
||||||
timeout 30 xpu-smi discovery || true
|
timeout 30 xpu-smi discovery || true
|
||||||
fi
|
fi
|
||||||
@ -339,13 +337,13 @@ test_python() {
|
|||||||
|
|
||||||
test_python_smoke() {
|
test_python_smoke() {
|
||||||
# Smoke tests for H100/B200
|
# Smoke tests for H100/B200
|
||||||
time python test/run_test.py --include test_matmul_cuda test_scaled_matmul_cuda inductor/test_fp8 inductor/test_max_autotune inductor/test_cutedsl_grouped_mm $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 inductor/test_max_autotune $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
}
|
}
|
||||||
|
|
||||||
test_python_smoke_b200() {
|
test_python_smoke_b200() {
|
||||||
# Targeted smoke tests for B200 - staged approach to avoid too many failures
|
# Targeted smoke tests for B200 - staged approach to avoid too many failures
|
||||||
time python test/run_test.py --include test_matmul_cuda test_scaled_matmul_cuda inductor/test_fp8 $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -462,37 +460,31 @@ test_inductor_shard() {
|
|||||||
--verbose
|
--verbose
|
||||||
}
|
}
|
||||||
|
|
||||||
test_inductor_aoti_cpp() {
|
test_inductor_aoti() {
|
||||||
|
# docker build uses bdist_wheel which does not work with test_aot_inductor
|
||||||
|
# TODO: need a faster way to build
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||||
# We need to hipify before building again
|
# We need to hipify before building again
|
||||||
python3 tools/amd_build/build_amd.py
|
python3 tools/amd_build/build_amd.py
|
||||||
fi
|
fi
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *sm86* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" == *sm86* ]]; then
|
||||||
|
BUILD_COMMAND=(TORCH_CUDA_ARCH_LIST=8.6 USE_FLASH_ATTENTION=OFF python -m pip install --no-build-isolation -v -e .)
|
||||||
# TODO: Replace me completely, as one should not use conda libstdc++, nor need special path to TORCH_LIB
|
# TODO: Replace me completely, as one should not use conda libstdc++, nor need special path to TORCH_LIB
|
||||||
TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="/opt/conda/envs/py_3.10/lib:${TORCH_LIB_DIR}:${LD_LIBRARY_PATH}")
|
TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="/opt/conda/envs/py_3.10/lib:${TORCH_LIB_DIR}:${LD_LIBRARY_PATH}")
|
||||||
else
|
else
|
||||||
|
BUILD_COMMAND=(python -m pip install --no-build-isolation -v -e .)
|
||||||
TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}")
|
TEST_ENVS=(CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# aoti cmake custom command requires `torch` to be installed
|
||||||
|
# initialize the cmake build cache and install torch
|
||||||
|
/usr/bin/env "${BUILD_COMMAND[@]}"
|
||||||
|
# rebuild with the build cache with `BUILD_AOT_INDUCTOR_TEST` enabled
|
||||||
|
/usr/bin/env CMAKE_FRESH=1 BUILD_AOT_INDUCTOR_TEST=1 "${BUILD_COMMAND[@]}"
|
||||||
|
|
||||||
/usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference cpp/test_vec_half_AVX2 -dist=loadfile
|
/usr/bin/env "${TEST_ENVS[@]}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference cpp/test_vec_half_AVX2 -dist=loadfile
|
||||||
}
|
}
|
||||||
|
|
||||||
test_inductor_aoti_cross_compile_for_windows() {
|
|
||||||
|
|
||||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
|
||||||
mkdir -p "$TEST_REPORTS_DIR"
|
|
||||||
|
|
||||||
# Set WINDOWS_CUDA_HOME environment variable
|
|
||||||
WINDOWS_CUDA_HOME="$(pwd)/win-torch-wheel-extracted"
|
|
||||||
export WINDOWS_CUDA_HOME
|
|
||||||
|
|
||||||
echo "WINDOWS_CUDA_HOME is set to: $WINDOWS_CUDA_HOME"
|
|
||||||
echo "Contents:"
|
|
||||||
ls -lah "$(pwd)/win-torch-wheel-extracted/lib/x64/" || true
|
|
||||||
|
|
||||||
python test/inductor/test_aoti_cross_compile_windows.py -k compile --package-dir "$TEST_REPORTS_DIR" --win-torch-lib-dir "$(pwd)/win-torch-wheel-extracted/torch/lib"
|
|
||||||
}
|
|
||||||
|
|
||||||
test_inductor_cpp_wrapper_shard() {
|
test_inductor_cpp_wrapper_shard() {
|
||||||
if [[ -z "$NUM_TEST_SHARDS" ]]; then
|
if [[ -z "$NUM_TEST_SHARDS" ]]; then
|
||||||
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
|
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
|
||||||
@ -574,8 +566,6 @@ fi
|
|||||||
|
|
||||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||||
DYNAMO_BENCHMARK_FLAGS+=(--device cpu)
|
DYNAMO_BENCHMARK_FLAGS+=(--device cpu)
|
||||||
elif [[ "${TEST_CONFIG}" == *xpu* ]]; then
|
|
||||||
DYNAMO_BENCHMARK_FLAGS+=(--device xpu)
|
|
||||||
else
|
else
|
||||||
DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
|
DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
|
||||||
fi
|
fi
|
||||||
@ -669,8 +659,6 @@ test_perf_for_dashboard() {
|
|||||||
device=cuda_b200
|
device=cuda_b200
|
||||||
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
|
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
|
||||||
device=rocm
|
device=rocm
|
||||||
elif [[ "${TEST_CONFIG}" == *xpu* ]]; then
|
|
||||||
device=xpu
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for mode in "${modes[@]}"; do
|
for mode in "${modes[@]}"; do
|
||||||
@ -826,11 +814,6 @@ test_inductor_halide() {
|
|||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
}
|
}
|
||||||
|
|
||||||
test_inductor_pallas() {
|
|
||||||
python test/run_test.py --include inductor/test_pallas.py --verbose
|
|
||||||
assert_git_not_dirty
|
|
||||||
}
|
|
||||||
|
|
||||||
test_inductor_triton_cpu() {
|
test_inductor_triton_cpu() {
|
||||||
python test/run_test.py --include inductor/test_triton_cpu_backend.py inductor/test_torchinductor_strided_blocks.py --verbose
|
python test/run_test.py --include inductor/test_triton_cpu_backend.py inductor/test_torchinductor_strided_blocks.py --verbose
|
||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
@ -855,7 +838,7 @@ test_dynamo_benchmark() {
|
|||||||
elif [[ "${suite}" == "timm_models" ]]; then
|
elif [[ "${suite}" == "timm_models" ]]; then
|
||||||
export TORCHBENCH_ONLY_MODELS="inception_v3"
|
export TORCHBENCH_ONLY_MODELS="inception_v3"
|
||||||
elif [[ "${suite}" == "torchbench" ]]; then
|
elif [[ "${suite}" == "torchbench" ]]; then
|
||||||
export TORCHBENCH_ONLY_MODELS="BERT_pytorch"
|
export TORCHBENCH_ONLY_MODELS="hf_Bert"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
|
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
|
||||||
@ -886,13 +869,13 @@ test_inductor_torchbench_smoketest_perf() {
|
|||||||
mkdir -p "$TEST_REPORTS_DIR"
|
mkdir -p "$TEST_REPORTS_DIR"
|
||||||
|
|
||||||
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
|
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
|
||||||
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only BERT_pytorch \
|
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
|
||||||
--output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
|
--output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
|
||||||
# The threshold value needs to be actively maintained to make this check useful
|
# The threshold value needs to be actively maintained to make this check useful
|
||||||
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
|
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
|
||||||
|
|
||||||
# Check memory compression ratio for a few models
|
# Check memory compression ratio for a few models
|
||||||
for test in BERT_pytorch yolov3; do
|
for test in hf_Albert timm_vision_transformer; do
|
||||||
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
|
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
|
||||||
--disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \
|
--disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \
|
||||||
--only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv"
|
--only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv"
|
||||||
@ -903,7 +886,7 @@ test_inductor_torchbench_smoketest_perf() {
|
|||||||
done
|
done
|
||||||
|
|
||||||
# Perform some "warm-start" runs for a few huggingface models.
|
# Perform some "warm-start" runs for a few huggingface models.
|
||||||
for test in AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
|
for test in AlbertForQuestionAnswering AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
|
||||||
python benchmarks/dynamo/huggingface.py --accuracy --training --amp --inductor --device cuda --warm-start-latency \
|
python benchmarks/dynamo/huggingface.py --accuracy --training --amp --inductor --device cuda --warm-start-latency \
|
||||||
--only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
|
--only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
|
||||||
python benchmarks/dynamo/check_accuracy.py \
|
python benchmarks/dynamo/check_accuracy.py \
|
||||||
@ -917,7 +900,7 @@ test_inductor_set_cpu_affinity(){
|
|||||||
export LD_PRELOAD="$JEMALLOC_LIB":"$LD_PRELOAD"
|
export LD_PRELOAD="$JEMALLOC_LIB":"$LD_PRELOAD"
|
||||||
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1"
|
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1"
|
||||||
|
|
||||||
if [[ "$(uname -m)" != "aarch64" ]]; then
|
if [[ "${TEST_CONFIG}" != *aarch64* ]]; then
|
||||||
# Use Intel OpenMP for x86
|
# Use Intel OpenMP for x86
|
||||||
IOMP_LIB="$(dirname "$(which python)")/../lib/libiomp5.so"
|
IOMP_LIB="$(dirname "$(which python)")/../lib/libiomp5.so"
|
||||||
export LD_PRELOAD="$IOMP_LIB":"$LD_PRELOAD"
|
export LD_PRELOAD="$IOMP_LIB":"$LD_PRELOAD"
|
||||||
@ -931,7 +914,7 @@ test_inductor_set_cpu_affinity(){
|
|||||||
cores=$((cpus / thread_per_core))
|
cores=$((cpus / thread_per_core))
|
||||||
|
|
||||||
# Set number of cores to 16 on aarch64 for performance runs
|
# Set number of cores to 16 on aarch64 for performance runs
|
||||||
if [[ "$(uname -m)" == "aarch64" && $cores -gt 16 ]]; then
|
if [[ "${TEST_CONFIG}" == *aarch64* && $cores -gt 16 ]]; then
|
||||||
cores=16
|
cores=16
|
||||||
fi
|
fi
|
||||||
export OMP_NUM_THREADS=$cores
|
export OMP_NUM_THREADS=$cores
|
||||||
@ -1632,7 +1615,6 @@ test_operator_benchmark() {
|
|||||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||||
mkdir -p "$TEST_REPORTS_DIR"
|
mkdir -p "$TEST_REPORTS_DIR"
|
||||||
TEST_DIR=$(pwd)
|
TEST_DIR=$(pwd)
|
||||||
ARCH=$(uname -m)
|
|
||||||
|
|
||||||
test_inductor_set_cpu_affinity
|
test_inductor_set_cpu_affinity
|
||||||
|
|
||||||
@ -1647,7 +1629,7 @@ test_operator_benchmark() {
|
|||||||
pip_install pandas
|
pip_install pandas
|
||||||
python check_perf_csv.py \
|
python check_perf_csv.py \
|
||||||
--actual "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \
|
--actual "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \
|
||||||
--expected "${ARCH}_expected_ci_operator_benchmark_eager_float32_cpu.csv"
|
--expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
|
||||||
}
|
}
|
||||||
|
|
||||||
test_operator_microbenchmark() {
|
test_operator_microbenchmark() {
|
||||||
@ -1660,7 +1642,7 @@ test_operator_microbenchmark() {
|
|||||||
|
|
||||||
cd "${TEST_DIR}"/benchmarks/operator_benchmark
|
cd "${TEST_DIR}"/benchmarks/operator_benchmark
|
||||||
|
|
||||||
for OP_BENCHMARK_TESTS in matmul mm addmm bmm conv; do
|
for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do
|
||||||
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
|
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
|
||||||
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
|
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
|
||||||
--benchmark-name "PyTorch operator microbenchmark" --use-compile
|
--benchmark-name "PyTorch operator microbenchmark" --use-compile
|
||||||
@ -1684,7 +1666,7 @@ if [[ "${TEST_CONFIG}" == *numpy_2* ]]; then
|
|||||||
python -m pip install --pre numpy==2.0.2 scipy==1.13.1 numba==0.60.0
|
python -m pip install --pre numpy==2.0.2 scipy==1.13.1 numba==0.60.0
|
||||||
fi
|
fi
|
||||||
python test/run_test.py --include dynamo/test_functions.py dynamo/test_unspec.py test_binary_ufuncs.py test_fake_tensor.py test_linalg.py test_numpy_interop.py test_tensor_creation_ops.py test_torch.py torch_np/test_basic.py
|
python test/run_test.py --include dynamo/test_functions.py dynamo/test_unspec.py test_binary_ufuncs.py test_fake_tensor.py test_linalg.py test_numpy_interop.py test_tensor_creation_ops.py test_torch.py torch_np/test_basic.py
|
||||||
elif [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" == 'default' ]]; then
|
elif [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then
|
||||||
test_linux_aarch64
|
test_linux_aarch64
|
||||||
elif [[ "${TEST_CONFIG}" == *backward* ]]; then
|
elif [[ "${TEST_CONFIG}" == *backward* ]]; then
|
||||||
test_forward_backward_compatibility
|
test_forward_backward_compatibility
|
||||||
@ -1731,14 +1713,10 @@ elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
|
|||||||
test_inductor_distributed
|
test_inductor_distributed
|
||||||
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
|
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
|
||||||
test_inductor_halide
|
test_inductor_halide
|
||||||
elif [[ "${TEST_CONFIG}" == *inductor-pallas* ]]; then
|
|
||||||
test_inductor_pallas
|
|
||||||
elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
|
elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
|
||||||
test_inductor_triton_cpu
|
test_inductor_triton_cpu
|
||||||
elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
|
elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
|
||||||
test_inductor_micro_benchmark
|
test_inductor_micro_benchmark
|
||||||
elif [[ "${TEST_CONFIG}" == *aoti_cross_compile_for_windows* ]]; then
|
|
||||||
test_inductor_aoti_cross_compile_for_windows
|
|
||||||
elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
|
elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
|
||||||
install_torchvision
|
install_torchvision
|
||||||
id=$((SHARD_NUMBER-1))
|
id=$((SHARD_NUMBER-1))
|
||||||
@ -1770,7 +1748,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
|||||||
else
|
else
|
||||||
# Do this after checkout_install_torchbench to ensure we clobber any
|
# Do this after checkout_install_torchbench to ensure we clobber any
|
||||||
# nightlies that torchbench may pull in
|
# nightlies that torchbench may pull in
|
||||||
if [[ "${TEST_CONFIG}" != *cpu* && "${TEST_CONFIG}" != *xpu* ]]; then
|
if [[ "${TEST_CONFIG}" != *cpu* ]]; then
|
||||||
install_torchrec_and_fbgemm
|
install_torchrec_and_fbgemm
|
||||||
fi
|
fi
|
||||||
PYTHONPATH=/torchbench test_dynamo_benchmark torchbench "$id"
|
PYTHONPATH=/torchbench test_dynamo_benchmark torchbench "$id"
|
||||||
@ -1779,7 +1757,7 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
|||||||
install_torchvision
|
install_torchvision
|
||||||
PYTHONPATH=/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
|
PYTHONPATH=/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
|
||||||
if [[ "$SHARD_NUMBER" -eq "1" ]]; then
|
if [[ "$SHARD_NUMBER" -eq "1" ]]; then
|
||||||
test_inductor_aoti_cpp
|
test_inductor_aoti
|
||||||
fi
|
fi
|
||||||
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||||
install_torchvision
|
install_torchvision
|
||||||
|
|||||||
@ -70,7 +70,7 @@ sccache --zero-stats
|
|||||||
sccache --show-stats
|
sccache --show-stats
|
||||||
|
|
||||||
# Build the wheel
|
# Build the wheel
|
||||||
python -m build --wheel --no-isolation
|
python -m build --wheel --no-build-isolation
|
||||||
if ($LASTEXITCODE -ne 0) { exit 1 }
|
if ($LASTEXITCODE -ne 0) { exit 1 }
|
||||||
|
|
||||||
# Install the wheel locally
|
# Install the wheel locally
|
||||||
|
|||||||
@ -15,35 +15,37 @@ if errorlevel 1 exit /b 1
|
|||||||
if not errorlevel 0 exit /b 1
|
if not errorlevel 0 exit /b 1
|
||||||
|
|
||||||
cd %TMP_DIR_WIN%\build\torch\test
|
cd %TMP_DIR_WIN%\build\torch\test
|
||||||
|
|
||||||
:: Enable delayed variable expansion to make the list
|
|
||||||
setlocal enabledelayedexpansion
|
|
||||||
set EXE_LIST=
|
|
||||||
for /r "." %%a in (*.exe) do (
|
for /r "." %%a in (*.exe) do (
|
||||||
if "%%~na" == "c10_intrusive_ptr_benchmark" (
|
call :libtorch_check "%%~na" "%%~fa"
|
||||||
@REM NB: This is not a gtest executable file, thus couldn't be handled by
|
|
||||||
@REM pytest-cpp and is excluded from test discovery by run_test
|
|
||||||
call "%%~fa"
|
|
||||||
if errorlevel 1 goto fail
|
if errorlevel 1 goto fail
|
||||||
if not errorlevel 0 goto fail
|
|
||||||
) else (
|
|
||||||
if "%%~na" == "verify_api_visibility" (
|
|
||||||
@REM Skip verify_api_visibility as it is a compile-level test
|
|
||||||
) else (
|
|
||||||
set EXE_LIST=!EXE_LIST! cpp/%%~na
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
goto :eof
|
||||||
|
|
||||||
|
:libtorch_check
|
||||||
|
|
||||||
cd %CWD%
|
cd %CWD%
|
||||||
set CPP_TESTS_DIR=%TMP_DIR_WIN%\build\torch\test
|
set CPP_TESTS_DIR=%TMP_DIR_WIN%\build\torch\test
|
||||||
|
|
||||||
:: Run python test\run_test.py on the list
|
:: Skip verify_api_visibility as it a compile level test
|
||||||
set NO_TD=True && python test\run_test.py --cpp --verbose -i !EXE_LIST!
|
if "%~1" == "verify_api_visibility" goto :eof
|
||||||
if errorlevel 1 goto fail
|
|
||||||
if not errorlevel 0 goto fail
|
|
||||||
|
|
||||||
goto :eof
|
echo Running "%~2"
|
||||||
|
if "%~1" == "c10_intrusive_ptr_benchmark" (
|
||||||
|
:: NB: This is not a gtest executable file, thus couldn't be handled by pytest-cpp
|
||||||
|
call "%~2"
|
||||||
|
goto :eof
|
||||||
|
)
|
||||||
|
|
||||||
|
python test\run_test.py --cpp --verbose -i "cpp/%~1"
|
||||||
|
if errorlevel 1 (
|
||||||
|
echo %1 failed with exit code %errorlevel%
|
||||||
|
goto fail
|
||||||
|
)
|
||||||
|
if not errorlevel 0 (
|
||||||
|
echo %1 failed with exit code %errorlevel%
|
||||||
|
goto fail
|
||||||
|
)
|
||||||
|
|
||||||
:eof
|
:eof
|
||||||
exit /b 0
|
exit /b 0
|
||||||
|
|||||||
@ -38,7 +38,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# TODO: Move this to .ci/docker/requirements-ci.txt
|
# TODO: Move this to .ci/docker/requirements-ci.txt
|
||||||
python -m pip install "psutil==5.9.1" nvidia-ml-py "pytest-shard==0.1.2"
|
python -m pip install "psutil==5.9.1" "pynvml==11.4.1" "pytest-shard==0.1.2"
|
||||||
|
|
||||||
run_tests() {
|
run_tests() {
|
||||||
# Run nvidia-smi if available
|
# Run nvidia-smi if available
|
||||||
|
|||||||
@ -7,9 +7,12 @@ if "%DESIRED_PYTHON%" == "3.13t" (
|
|||||||
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe"
|
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe"
|
||||||
set ADDITIONAL_OPTIONS="Include_freethreaded=1"
|
set ADDITIONAL_OPTIONS="Include_freethreaded=1"
|
||||||
set PYTHON_EXEC="python3.13t"
|
set PYTHON_EXEC="python3.13t"
|
||||||
|
) else if "%DESIRED_PYTHON%"=="3.14" (
|
||||||
|
echo Python version is set to 3.14 or 3.14t
|
||||||
|
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0rc1-amd64.exe"
|
||||||
) else if "%DESIRED_PYTHON%"=="3.14t" (
|
) else if "%DESIRED_PYTHON%"=="3.14t" (
|
||||||
echo Python version is set to 3.14 or 3.14t
|
echo Python version is set to 3.14 or 3.14t
|
||||||
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0-amd64.exe"
|
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0rc1-amd64.exe"
|
||||||
set ADDITIONAL_OPTIONS="Include_freethreaded=1"
|
set ADDITIONAL_OPTIONS="Include_freethreaded=1"
|
||||||
set PYTHON_EXEC="python3.14t"
|
set PYTHON_EXEC="python3.14t"
|
||||||
) else (
|
) else (
|
||||||
|
|||||||
@ -71,7 +71,14 @@ export PYTORCH_BUILD_NUMBER=1
|
|||||||
|
|
||||||
# Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS
|
# Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS
|
||||||
TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
|
TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
|
||||||
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
|
||||||
|
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
|
||||||
|
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||||
|
|
||||||
|
# CUDA 12.9/13.0 builds have triton for Linux and Linux aarch64 binaries.
|
||||||
|
if [[ "$DESIRED_CUDA" == "cu129" ]] || [[ "$DESIRED_CUDA" == "cu130" ]]; then
|
||||||
|
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
|
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
|
||||||
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
||||||
@ -163,13 +170,8 @@ if [[ "$(uname)" != Darwin ]]; then
|
|||||||
MEMORY_LIMIT_MAX_JOBS=12
|
MEMORY_LIMIT_MAX_JOBS=12
|
||||||
NUM_CPUS=$(( $(nproc) - 2 ))
|
NUM_CPUS=$(( $(nproc) - 2 ))
|
||||||
|
|
||||||
if [[ "$(uname)" == Linux ]]; then
|
# Defaults here for **binary** linux builds so they can be changed in one place
|
||||||
# Defaults here for **binary** linux builds so they can be changed in one place
|
export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}
|
||||||
export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}
|
|
||||||
else
|
|
||||||
# For other builds
|
|
||||||
export MAX_JOBS=${NUM_CPUS}
|
|
||||||
fi
|
|
||||||
|
|
||||||
cat >>"$envfile" <<EOL
|
cat >>"$envfile" <<EOL
|
||||||
export MAX_JOBS="${MAX_JOBS}"
|
export MAX_JOBS="${MAX_JOBS}"
|
||||||
|
|||||||
@ -60,11 +60,9 @@ performance-*,
|
|||||||
readability-container-size-empty,
|
readability-container-size-empty,
|
||||||
readability-delete-null-pointer,
|
readability-delete-null-pointer,
|
||||||
readability-duplicate-include,
|
readability-duplicate-include,
|
||||||
readability-named-parameter,
|
|
||||||
readability-misplaced-array-index,
|
readability-misplaced-array-index,
|
||||||
readability-redundant*,
|
readability-redundant*,
|
||||||
readability-simplify-subscript-expr,
|
readability-simplify-subscript-expr,
|
||||||
readability-static-definition-in-anonymous-namespace
|
|
||||||
readability-string-compare,
|
readability-string-compare,
|
||||||
-readability-redundant-access-specifiers,
|
-readability-redundant-access-specifiers,
|
||||||
-readability-redundant-control-flow,
|
-readability-redundant-control-flow,
|
||||||
|
|||||||
@ -1,319 +0,0 @@
|
|||||||
---
|
|
||||||
name: add-uint-support
|
|
||||||
description: Add unsigned integer (uint) type support to PyTorch operators by updating AT_DISPATCH macros. Use when adding support for uint16, uint32, uint64 types to operators, kernels, or when user mentions enabling unsigned types, barebones unsigned types, or uint support.
|
|
||||||
---
|
|
||||||
|
|
||||||
# Add Unsigned Integer (uint) Support to Operators
|
|
||||||
|
|
||||||
This skill helps add support for unsigned integer types (uint16, uint32, uint64) to PyTorch operators by updating their AT_DISPATCH macros.
|
|
||||||
|
|
||||||
## When to use this skill
|
|
||||||
|
|
||||||
Use this skill when:
|
|
||||||
- Adding uint16, uint32, or uint64 support to an operator
|
|
||||||
- User mentions "unsigned types", "uint support", "barebones unsigned types"
|
|
||||||
- Enabling support for kUInt16, kUInt32, kUInt64 in kernels
|
|
||||||
- Working with operator implementations that need expanded type coverage
|
|
||||||
|
|
||||||
## Quick reference
|
|
||||||
|
|
||||||
**Add unsigned types to existing dispatch:**
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES));
|
|
||||||
|
|
||||||
// After (method 1: add unsigned types explicitly)
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
|
|
||||||
|
|
||||||
// After (method 2: use V2 integral types if AT_INTEGRAL_TYPES present)
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_INTEGRAL_TYPES_V2), AT_EXPAND(AT_FLOATING_TYPES));
|
|
||||||
```
|
|
||||||
|
|
||||||
## Type group reference
|
|
||||||
|
|
||||||
**Unsigned type groups:**
|
|
||||||
- `AT_BAREBONES_UNSIGNED_TYPES`: kUInt16, kUInt32, kUInt64
|
|
||||||
- `AT_INTEGRAL_TYPES_V2`: AT_INTEGRAL_TYPES + AT_BAREBONES_UNSIGNED_TYPES
|
|
||||||
|
|
||||||
**Relationship:**
|
|
||||||
```cpp
|
|
||||||
AT_INTEGRAL_TYPES // kByte, kChar, kInt, kLong, kShort
|
|
||||||
AT_BAREBONES_UNSIGNED_TYPES // kUInt16, kUInt32, kUInt64
|
|
||||||
AT_INTEGRAL_TYPES_V2 // INTEGRAL_TYPES + BAREBONES_UNSIGNED_TYPES
|
|
||||||
```
|
|
||||||
|
|
||||||
## Instructions
|
|
||||||
|
|
||||||
### Step 1: Determine if conversion to V2 is needed
|
|
||||||
|
|
||||||
Check if the file uses AT_DISPATCH_V2:
|
|
||||||
|
|
||||||
**If using old AT_DISPATCH:**
|
|
||||||
- First convert to AT_DISPATCH_V2 using the at-dispatch-v2 skill
|
|
||||||
- Then proceed with adding uint support
|
|
||||||
|
|
||||||
**If already using AT_DISPATCH_V2:**
|
|
||||||
- Proceed directly to Step 2
|
|
||||||
|
|
||||||
### Step 2: Analyze the current dispatch macro
|
|
||||||
|
|
||||||
Identify what type groups are currently in use:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
// body
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBFloat16);
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
Current type coverage
|
|
||||||
```
|
|
||||||
|
|
||||||
Common patterns:
|
|
||||||
- `AT_EXPAND(AT_ALL_TYPES)` → includes AT_INTEGRAL_TYPES + AT_FLOATING_TYPES
|
|
||||||
- `AT_EXPAND(AT_INTEGRAL_TYPES)` → signed integers only
|
|
||||||
- `AT_EXPAND(AT_FLOATING_TYPES)` → floating point types
|
|
||||||
|
|
||||||
### Step 3: Choose the uint addition method
|
|
||||||
|
|
||||||
Two approaches:
|
|
||||||
|
|
||||||
**Method 1: Add AT_BAREBONES_UNSIGNED_TYPES explicitly**
|
|
||||||
- Use when: You want to be explicit about adding uint support
|
|
||||||
- Add `AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES)` to the type list
|
|
||||||
|
|
||||||
**Method 2: Substitute AT_INTEGRAL_TYPES with AT_INTEGRAL_TYPES_V2**
|
|
||||||
- Use when: The dispatch already uses `AT_EXPAND(AT_INTEGRAL_TYPES)`
|
|
||||||
- More concise: replaces one type group with its superset
|
|
||||||
- Only applicable if AT_INTEGRAL_TYPES is present
|
|
||||||
|
|
||||||
### Step 4: Apply the transformation
|
|
||||||
|
|
||||||
**Method 1 example:**
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_V2(
|
|
||||||
dtype,
|
|
||||||
"min_values_cuda",
|
|
||||||
AT_WRAP([&]() {
|
|
||||||
kernel_impl<scalar_t>(iter);
|
|
||||||
}),
|
|
||||||
AT_EXPAND(AT_ALL_TYPES),
|
|
||||||
kBFloat16, kHalf, kBool
|
|
||||||
);
|
|
||||||
|
|
||||||
// After (add unsigned types)
|
|
||||||
AT_DISPATCH_V2(
|
|
||||||
dtype,
|
|
||||||
"min_values_cuda",
|
|
||||||
AT_WRAP([&]() {
|
|
||||||
kernel_impl<scalar_t>(iter);
|
|
||||||
}),
|
|
||||||
AT_EXPAND(AT_ALL_TYPES),
|
|
||||||
AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES),
|
|
||||||
kBFloat16, kHalf, kBool
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
**Method 2 example:**
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_V2(
|
|
||||||
dtype,
|
|
||||||
"integral_op",
|
|
||||||
AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}),
|
|
||||||
AT_EXPAND(AT_INTEGRAL_TYPES)
|
|
||||||
);
|
|
||||||
|
|
||||||
// After (substitute with V2)
|
|
||||||
AT_DISPATCH_V2(
|
|
||||||
dtype,
|
|
||||||
"integral_op",
|
|
||||||
AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}),
|
|
||||||
AT_EXPAND(AT_INTEGRAL_TYPES_V2)
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 5: Handle AT_ALL_TYPES vs individual type groups
|
|
||||||
|
|
||||||
If the dispatch uses `AT_EXPAND(AT_ALL_TYPES)`:
|
|
||||||
- `AT_ALL_TYPES` = `AT_INTEGRAL_TYPES` + `AT_FLOATING_TYPES`
|
|
||||||
- To add uint: add `AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES)` to the list
|
|
||||||
|
|
||||||
If the dispatch separately lists INTEGRAL and FLOATING:
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_EXPAND(AT_INTEGRAL_TYPES), AT_EXPAND(AT_FLOATING_TYPES)
|
|
||||||
|
|
||||||
// After (Method 2 preferred)
|
|
||||||
AT_EXPAND(AT_INTEGRAL_TYPES_V2), AT_EXPAND(AT_FLOATING_TYPES)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 6: Verify all dispatch sites
|
|
||||||
|
|
||||||
Check the file for ALL dispatch macros that need uint support:
|
|
||||||
- Some operators have multiple dispatch sites (CPU, CUDA, different functions)
|
|
||||||
- Apply the transformation consistently across all sites
|
|
||||||
- Ensure each gets the same type coverage updates
|
|
||||||
|
|
||||||
### Step 7: Validate the changes
|
|
||||||
|
|
||||||
Check that:
|
|
||||||
- [ ] AT_DISPATCH_V2 format is used (not old AT_DISPATCH)
|
|
||||||
- [ ] Unsigned types are added via one of the two methods
|
|
||||||
- [ ] All relevant dispatch sites in the file are updated
|
|
||||||
- [ ] Type groups use `AT_EXPAND()`
|
|
||||||
- [ ] Arguments are properly formatted and comma-separated
|
|
||||||
|
|
||||||
## Common patterns
|
|
||||||
|
|
||||||
### Pattern 1: AT_ALL_TYPES + extras
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBFloat16);
|
|
||||||
|
|
||||||
// After
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES), kHalf, kBFloat16);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Pattern 2: Separate INTEGRAL + FLOATING
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_INTEGRAL_TYPES), AT_EXPAND(AT_FLOATING_TYPES));
|
|
||||||
|
|
||||||
// After
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_INTEGRAL_TYPES_V2), AT_EXPAND(AT_FLOATING_TYPES));
|
|
||||||
```
|
|
||||||
|
|
||||||
### Pattern 3: Old dispatch needs conversion first
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before (needs v2 conversion first)
|
|
||||||
AT_DISPATCH_ALL_TYPES_AND2(kHalf, kBFloat16, dtype, "op", [&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
});
|
|
||||||
|
|
||||||
// After v2 conversion
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBFloat16);
|
|
||||||
|
|
||||||
// After adding uint support
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES), kHalf, kBFloat16);
|
|
||||||
```
|
|
||||||
|
|
||||||
## Multiple dispatch sites example
|
|
||||||
|
|
||||||
For a file with multiple functions:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
void min_values_kernel_cuda(TensorIterator& iter) {
|
|
||||||
AT_DISPATCH_V2(iter.dtype(), "min_values_cuda", AT_WRAP([&]() {
|
|
||||||
impl<scalar_t>(iter);
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES), kBFloat16, kHalf);
|
|
||||||
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
// Added uint support
|
|
||||||
}
|
|
||||||
|
|
||||||
void min_launch_kernel(TensorIterator &iter) {
|
|
||||||
AT_DISPATCH_V2(iter.input_dtype(), "min_cuda", AT_WRAP([&]() {
|
|
||||||
gpu_reduce_kernel<scalar_t>(iter);
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES), kBFloat16, kHalf);
|
|
||||||
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
// Added uint support here too
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Decision tree
|
|
||||||
|
|
||||||
Use this decision tree to determine the approach:
|
|
||||||
|
|
||||||
```
|
|
||||||
Is the file using AT_DISPATCH_V2?
|
|
||||||
├─ No → Use at-dispatch-v2 skill first, then continue
|
|
||||||
└─ Yes
|
|
||||||
└─ Does it use AT_EXPAND(AT_INTEGRAL_TYPES)?
|
|
||||||
├─ Yes → Replace with AT_EXPAND(AT_INTEGRAL_TYPES_V2)
|
|
||||||
└─ No → Add AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES) to type list
|
|
||||||
```
|
|
||||||
|
|
||||||
## Edge cases
|
|
||||||
|
|
||||||
### Case 1: Dispatch with only floating types
|
|
||||||
|
|
||||||
If the operator only supports floating point types, don't add uint support:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Leave as-is - floating point only operator
|
|
||||||
AT_DISPATCH_V2(dtype, "float_op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_FLOATING_TYPES), kHalf);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Case 2: Complex types present
|
|
||||||
|
|
||||||
Unsigned types work alongside complex types:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES),
|
|
||||||
AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES),
|
|
||||||
AT_EXPAND(AT_COMPLEX_TYPES),
|
|
||||||
kHalf, kBFloat16);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Case 3: Already has uint support
|
|
||||||
|
|
||||||
Check if uint types are already present:
|
|
||||||
- If `AT_INTEGRAL_TYPES_V2` is used → already has uint support
|
|
||||||
- If `AT_BAREBONES_UNSIGNED_TYPES` is already in list → already has uint support
|
|
||||||
- Skip the file if uint support is already present
|
|
||||||
|
|
||||||
## Workflow
|
|
||||||
|
|
||||||
When asked to add uint support:
|
|
||||||
|
|
||||||
1. Read the target file
|
|
||||||
2. Check if using AT_DISPATCH_V2:
|
|
||||||
- If not → use at-dispatch-v2 skill first
|
|
||||||
3. Identify all dispatch macro sites
|
|
||||||
4. For each dispatch:
|
|
||||||
- Analyze current type groups
|
|
||||||
- Choose method (add BAREBONES_UNSIGNED or upgrade to V2)
|
|
||||||
- Apply transformation with Edit tool
|
|
||||||
5. Show the user the changes
|
|
||||||
6. Explain what was modified
|
|
||||||
|
|
||||||
## Important notes
|
|
||||||
|
|
||||||
- Always check if v2 conversion is needed first
|
|
||||||
- Apply changes consistently across all dispatch sites in the file
|
|
||||||
- Method 2 (AT_INTEGRAL_TYPES_V2) is cleaner when applicable
|
|
||||||
- Method 1 (explicit AT_BAREBONES_UNSIGNED_TYPES) is more explicit
|
|
||||||
- Unsigned types are: kUInt16, kUInt32, kUInt64 (not kByte which is uint8)
|
|
||||||
- Some operators may not semantically support unsigned types - use judgment
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
After adding uint support, the operator should accept uint16, uint32, and uint64 tensors. The user is responsible for functional testing.
|
|
||||||
@ -1,305 +0,0 @@
|
|||||||
---
|
|
||||||
name: at-dispatch-v2
|
|
||||||
description: Convert PyTorch AT_DISPATCH macros to AT_DISPATCH_V2 format in ATen C++ code. Use when porting AT_DISPATCH_ALL_TYPES_AND*, AT_DISPATCH_FLOATING_TYPES*, or other dispatch macros to the new v2 API. For ATen kernel files, CUDA kernels, and native operator implementations.
|
|
||||||
---
|
|
||||||
|
|
||||||
# AT_DISPATCH to AT_DISPATCH_V2 Converter
|
|
||||||
|
|
||||||
This skill helps convert PyTorch's legacy AT_DISPATCH macros to the new AT_DISPATCH_V2 format, as defined in `aten/src/ATen/Dispatch_v2.h`.
|
|
||||||
|
|
||||||
## When to use this skill
|
|
||||||
|
|
||||||
Use this skill when:
|
|
||||||
- Converting AT_DISPATCH_* macros to AT_DISPATCH_V2
|
|
||||||
- Porting ATen kernels to use the new dispatch API
|
|
||||||
- Working with files in `aten/src/ATen/native/` that use dispatch macros
|
|
||||||
- User mentions "AT_DISPATCH", "dispatch v2", "Dispatch_v2.h", or macro conversion
|
|
||||||
|
|
||||||
## Quick reference
|
|
||||||
|
|
||||||
**Old format:**
|
|
||||||
```cpp
|
|
||||||
AT_DISPATCH_ALL_TYPES_AND3(kBFloat16, kHalf, kBool, dtype, "kernel_name", [&]() {
|
|
||||||
// lambda body
|
|
||||||
});
|
|
||||||
```
|
|
||||||
|
|
||||||
**New format:**
|
|
||||||
```cpp
|
|
||||||
AT_DISPATCH_V2(dtype, "kernel_name", AT_WRAP([&]() {
|
|
||||||
// lambda body
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), kBFloat16, kHalf, kBool);
|
|
||||||
```
|
|
||||||
|
|
||||||
## Key transformations
|
|
||||||
|
|
||||||
1. **Reorder arguments**: `scalar_type` and `name` come first, then lambda, then types
|
|
||||||
2. **Wrap the lambda**: Use `AT_WRAP(lambda)` to handle internal commas
|
|
||||||
3. **Expand type groups**: Use `AT_EXPAND(AT_ALL_TYPES)` instead of implicit expansion
|
|
||||||
4. **List individual types**: Add extra types (kHalf, kBFloat16, etc.) after expanded groups
|
|
||||||
5. **Add include**: `#include <ATen/Dispatch_v2.h>` near other Dispatch includes
|
|
||||||
|
|
||||||
## Instructions
|
|
||||||
|
|
||||||
### Step 1: Add the Dispatch_v2.h include
|
|
||||||
|
|
||||||
Add the v2 header near the existing `#include <ATen/Dispatch.h>`:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
#include <ATen/Dispatch.h>
|
|
||||||
#include <ATen/Dispatch_v2.h>
|
|
||||||
```
|
|
||||||
|
|
||||||
Keep the old Dispatch.h include for now (other code may still need it).
|
|
||||||
|
|
||||||
### Step 2: Identify the old dispatch pattern
|
|
||||||
|
|
||||||
Common patterns to convert:
|
|
||||||
|
|
||||||
- `AT_DISPATCH_ALL_TYPES_AND{2,3,4}(type1, type2, ..., scalar_type, name, lambda)`
|
|
||||||
- `AT_DISPATCH_FLOATING_TYPES_AND{2,3}(type1, type2, ..., scalar_type, name, lambda)`
|
|
||||||
- `AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND{2,3}(type1, ..., scalar_type, name, lambda)`
|
|
||||||
- `AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND{2,3}(type1, ..., scalar_type, name, lambda)`
|
|
||||||
|
|
||||||
### Step 3: Map the old macro to type groups
|
|
||||||
|
|
||||||
Identify which type group macro corresponds to the base types:
|
|
||||||
|
|
||||||
| Old macro base | AT_DISPATCH_V2 type group |
|
|
||||||
|----------------|---------------------------|
|
|
||||||
| `ALL_TYPES` | `AT_EXPAND(AT_ALL_TYPES)` |
|
|
||||||
| `FLOATING_TYPES` | `AT_EXPAND(AT_FLOATING_TYPES)` |
|
|
||||||
| `INTEGRAL_TYPES` | `AT_EXPAND(AT_INTEGRAL_TYPES)` |
|
|
||||||
| `COMPLEX_TYPES` | `AT_EXPAND(AT_COMPLEX_TYPES)` |
|
|
||||||
| `ALL_TYPES_AND_COMPLEX` | `AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX)` |
|
|
||||||
|
|
||||||
For combined patterns, use multiple `AT_EXPAND()` entries:
|
|
||||||
```cpp
|
|
||||||
// Old: AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(...)
|
|
||||||
// New: AT_EXPAND(AT_ALL_TYPES), AT_EXPAND(AT_COMPLEX_TYPES), type1, type2
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 4: Extract the individual types
|
|
||||||
|
|
||||||
From `AT_DISPATCH_*_AND2(type1, type2, ...)` or `AT_DISPATCH_*_AND3(type1, type2, type3, ...)`, extract the individual types (type1, type2, etc.).
|
|
||||||
|
|
||||||
These become the trailing arguments after the type group:
|
|
||||||
```cpp
|
|
||||||
AT_DISPATCH_V2(..., AT_EXPAND(AT_ALL_TYPES), kBFloat16, kHalf, kBool)
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
Individual types from AND3
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 5: Transform to AT_DISPATCH_V2
|
|
||||||
|
|
||||||
Apply the transformation:
|
|
||||||
|
|
||||||
**Pattern:**
|
|
||||||
```cpp
|
|
||||||
AT_DISPATCH_V2(
|
|
||||||
scalar_type, // 1st: The dtype expression
|
|
||||||
"name", // 2nd: The debug string
|
|
||||||
AT_WRAP(lambda), // 3rd: The lambda wrapped in AT_WRAP
|
|
||||||
type_groups, // 4th+: Type groups with AT_EXPAND()
|
|
||||||
individual_types // Last: Individual types
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Example transformation:**
|
|
||||||
```cpp
|
|
||||||
// BEFORE
|
|
||||||
AT_DISPATCH_ALL_TYPES_AND3(
|
|
||||||
kBFloat16, kHalf, kBool,
|
|
||||||
iter.dtype(),
|
|
||||||
"min_values_cuda",
|
|
||||||
[&]() {
|
|
||||||
min_values_kernel_cuda_impl<scalar_t>(iter);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
// AFTER
|
|
||||||
AT_DISPATCH_V2(
|
|
||||||
iter.dtype(),
|
|
||||||
"min_values_cuda",
|
|
||||||
AT_WRAP([&]() {
|
|
||||||
min_values_kernel_cuda_impl<scalar_t>(iter);
|
|
||||||
}),
|
|
||||||
AT_EXPAND(AT_ALL_TYPES),
|
|
||||||
kBFloat16, kHalf, kBool
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 6: Handle multi-line lambdas
|
|
||||||
|
|
||||||
For lambdas with internal commas or complex expressions, AT_WRAP is essential:
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
AT_DISPATCH_V2(
|
|
||||||
dtype,
|
|
||||||
"complex_kernel",
|
|
||||||
AT_WRAP([&]() {
|
|
||||||
gpu_reduce_kernel<scalar_t, scalar_t>(
|
|
||||||
iter,
|
|
||||||
MinOps<scalar_t>{},
|
|
||||||
thrust::pair<scalar_t, int64_t>(upper_bound(), 0) // Commas inside!
|
|
||||||
);
|
|
||||||
}),
|
|
||||||
AT_EXPAND(AT_ALL_TYPES)
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 7: Verify the conversion
|
|
||||||
|
|
||||||
Check that:
|
|
||||||
- [ ] `AT_WRAP()` wraps the entire lambda
|
|
||||||
- [ ] Type groups use `AT_EXPAND()`
|
|
||||||
- [ ] Individual types don't have `AT_EXPAND()` (just `kBFloat16`, not `AT_EXPAND(kBFloat16)`)
|
|
||||||
- [ ] Argument order is: scalar_type, name, lambda, types
|
|
||||||
- [ ] Include added: `#include <ATen/Dispatch_v2.h>`
|
|
||||||
|
|
||||||
## Type group reference
|
|
||||||
|
|
||||||
Available type group macros (use with `AT_EXPAND()`):
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
AT_INTEGRAL_TYPES // kByte, kChar, kInt, kLong, kShort
|
|
||||||
AT_FLOATING_TYPES // kDouble, kFloat
|
|
||||||
AT_COMPLEX_TYPES // kComplexDouble, kComplexFloat
|
|
||||||
AT_QINT_TYPES // kQInt8, kQUInt8, kQInt32
|
|
||||||
AT_ALL_TYPES // INTEGRAL_TYPES + FLOATING_TYPES
|
|
||||||
AT_ALL_TYPES_AND_COMPLEX // ALL_TYPES + COMPLEX_TYPES
|
|
||||||
AT_INTEGRAL_TYPES_V2 // INTEGRAL_TYPES + unsigned types
|
|
||||||
AT_BAREBONES_UNSIGNED_TYPES // kUInt16, kUInt32, kUInt64
|
|
||||||
AT_FLOAT8_TYPES // Float8 variants
|
|
||||||
```
|
|
||||||
|
|
||||||
## Common patterns
|
|
||||||
|
|
||||||
### Pattern: AT_DISPATCH_ALL_TYPES_AND2
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_ALL_TYPES_AND2(kHalf, kBFloat16, dtype, "op", [&]() {
|
|
||||||
kernel<scalar_t>(data);
|
|
||||||
});
|
|
||||||
|
|
||||||
// After
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>(data);
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBFloat16);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Pattern: AT_DISPATCH_FLOATING_TYPES_AND3
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_FLOATING_TYPES_AND3(kHalf, kBFloat16, kFloat8_e4m3fn,
|
|
||||||
tensor.scalar_type(), "float_op", [&] {
|
|
||||||
process<scalar_t>(tensor);
|
|
||||||
});
|
|
||||||
|
|
||||||
// After
|
|
||||||
AT_DISPATCH_V2(tensor.scalar_type(), "float_op", AT_WRAP([&] {
|
|
||||||
process<scalar_t>(tensor);
|
|
||||||
}), AT_EXPAND(AT_FLOATING_TYPES), kHalf, kBFloat16, kFloat8_e4m3fn);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Pattern: AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(
|
|
||||||
kComplexHalf, kHalf,
|
|
||||||
self.scalar_type(),
|
|
||||||
"complex_op",
|
|
||||||
[&] {
|
|
||||||
result = compute<scalar_t>(self);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
// After
|
|
||||||
AT_DISPATCH_V2(
|
|
||||||
self.scalar_type(),
|
|
||||||
"complex_op",
|
|
||||||
AT_WRAP([&] {
|
|
||||||
result = compute<scalar_t>(self);
|
|
||||||
}),
|
|
||||||
AT_EXPAND(AT_ALL_TYPES),
|
|
||||||
AT_EXPAND(AT_COMPLEX_TYPES),
|
|
||||||
kComplexHalf,
|
|
||||||
kHalf
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
## Edge cases
|
|
||||||
|
|
||||||
### Case 1: No extra types (rare)
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_ALL_TYPES(dtype, "op", [&]() { kernel<scalar_t>(); });
|
|
||||||
|
|
||||||
// After
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES));
|
|
||||||
```
|
|
||||||
|
|
||||||
### Case 2: Many individual types (AND4, AND5, etc.)
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_FLOATING_TYPES_AND4(kHalf, kBFloat16, kFloat8_e4m3fn, kFloat8_e5m2,
|
|
||||||
dtype, "float8_op", [&]() { kernel<scalar_t>(); });
|
|
||||||
|
|
||||||
// After
|
|
||||||
AT_DISPATCH_V2(dtype, "float8_op", AT_WRAP([&]() {
|
|
||||||
kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_FLOATING_TYPES), kHalf, kBFloat16, kFloat8_e4m3fn, kFloat8_e5m2);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Case 3: Lambda with no captures
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// Before
|
|
||||||
AT_DISPATCH_ALL_TYPES_AND2(kHalf, kBool, dtype, "op", []() {
|
|
||||||
static_kernel<scalar_t>();
|
|
||||||
});
|
|
||||||
|
|
||||||
// After
|
|
||||||
AT_DISPATCH_V2(dtype, "op", AT_WRAP([]() {
|
|
||||||
static_kernel<scalar_t>();
|
|
||||||
}), AT_EXPAND(AT_ALL_TYPES), kHalf, kBool);
|
|
||||||
```
|
|
||||||
|
|
||||||
## Benefits of AT_DISPATCH_V2
|
|
||||||
|
|
||||||
1. **No arity in macro name**: Don't need different macros for AND2, AND3, AND4
|
|
||||||
2. **Composable type sets**: Mix and match type groups with `AT_EXPAND()`
|
|
||||||
3. **Extensible**: Easy to add more types without hitting macro limits
|
|
||||||
4. **Clearer**: Type groups are explicit, not implicit in macro name
|
|
||||||
|
|
||||||
## Important notes
|
|
||||||
|
|
||||||
- Keep `#include <ATen/Dispatch.h>` - other code may need it
|
|
||||||
- The `AT_WRAP()` is mandatory - prevents comma parsing issues in the lambda
|
|
||||||
- Type groups need `AT_EXPAND()`, individual types don't
|
|
||||||
- The v2 API is in `aten/src/ATen/Dispatch_v2.h` - refer to it for full docs
|
|
||||||
- See the header file for the Python script to regenerate the macro implementation
|
|
||||||
|
|
||||||
## Workflow
|
|
||||||
|
|
||||||
When asked to convert AT_DISPATCH macros:
|
|
||||||
|
|
||||||
1. Read the file to identify all AT_DISPATCH uses
|
|
||||||
2. Add `#include <ATen/Dispatch_v2.h>` if not present
|
|
||||||
3. For each dispatch macro:
|
|
||||||
- Identify the pattern and extract components
|
|
||||||
- Map the base type group
|
|
||||||
- Extract individual types
|
|
||||||
- Construct the AT_DISPATCH_V2 call
|
|
||||||
- Apply with Edit tool
|
|
||||||
4. Show the user the complete converted file
|
|
||||||
5. Explain what was changed
|
|
||||||
|
|
||||||
Do NOT compile or test the code - focus on accurate conversion only.
|
|
||||||
@ -1,359 +0,0 @@
|
|||||||
---
|
|
||||||
name: docstring
|
|
||||||
description: Write docstrings for PyTorch functions and methods following PyTorch conventions. Use when writing or updating docstrings in PyTorch code.
|
|
||||||
---
|
|
||||||
|
|
||||||
# PyTorch Docstring Writing Guide
|
|
||||||
|
|
||||||
This skill describes how to write docstrings for functions and methods in the PyTorch project, following the conventions in `torch/_tensor_docs.py` and `torch/nn/functional.py`.
|
|
||||||
|
|
||||||
## General Principles
|
|
||||||
|
|
||||||
- Use **raw strings** (`r"""..."""`) for all docstrings to avoid issues with LaTeX/math backslashes
|
|
||||||
- Follow **Sphinx/reStructuredText** (reST) format for documentation
|
|
||||||
- Be **concise but complete** - include all essential information
|
|
||||||
- Always include **examples** when possible
|
|
||||||
- Use **cross-references** to related functions/classes
|
|
||||||
|
|
||||||
## Docstring Structure
|
|
||||||
|
|
||||||
### 1. Function Signature (First Line)
|
|
||||||
|
|
||||||
Start with the function signature showing all parameters:
|
|
||||||
|
|
||||||
```python
|
|
||||||
r"""function_name(param1, param2, *, kwarg1=default1, kwarg2=default2) -> ReturnType
|
|
||||||
```
|
|
||||||
|
|
||||||
**Notes:**
|
|
||||||
- Include the function name
|
|
||||||
- Show positional and keyword-only arguments (use `*` separator)
|
|
||||||
- Include default values
|
|
||||||
- Show return type annotation
|
|
||||||
- This line should NOT end with a period
|
|
||||||
|
|
||||||
### 2. Brief Description
|
|
||||||
|
|
||||||
Provide a one-line description of what the function does:
|
|
||||||
|
|
||||||
```python
|
|
||||||
r"""conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor
|
|
||||||
|
|
||||||
Applies a 2D convolution over an input image composed of several input
|
|
||||||
planes.
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Mathematical Formulas (if applicable)
|
|
||||||
|
|
||||||
Use Sphinx math directives for mathematical expressions:
|
|
||||||
|
|
||||||
```python
|
|
||||||
.. math::
|
|
||||||
\text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
|
|
||||||
```
|
|
||||||
|
|
||||||
Or inline math: `:math:\`x^2\``
|
|
||||||
|
|
||||||
### 4. Cross-References
|
|
||||||
|
|
||||||
Link to related classes and functions using Sphinx roles:
|
|
||||||
|
|
||||||
- `:class:\`~torch.nn.ModuleName\`` - Link to a class
|
|
||||||
- `:func:\`torch.function_name\`` - Link to a function
|
|
||||||
- `:meth:\`~Tensor.method_name\`` - Link to a method
|
|
||||||
- `:attr:\`attribute_name\`` - Reference an attribute
|
|
||||||
- The `~` prefix shows only the last component (e.g., `Conv2d` instead of `torch.nn.Conv2d`)
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
```python
|
|
||||||
See :class:`~torch.nn.Conv2d` for details and output shape.
|
|
||||||
```
|
|
||||||
|
|
||||||
### 5. Notes and Warnings
|
|
||||||
|
|
||||||
Use admonitions for important information:
|
|
||||||
|
|
||||||
```python
|
|
||||||
.. note::
|
|
||||||
This function doesn't work directly with NLLLoss,
|
|
||||||
which expects the Log to be computed between the Softmax and itself.
|
|
||||||
Use log_softmax instead (it's faster and has better numerical properties).
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
:func:`new_tensor` always copies :attr:`data`. If you have a Tensor
|
|
||||||
``data`` and want to avoid a copy, use :func:`torch.Tensor.requires_grad_`
|
|
||||||
or :func:`torch.Tensor.detach`.
|
|
||||||
```
|
|
||||||
|
|
||||||
### 6. Args Section
|
|
||||||
|
|
||||||
Document all parameters with type annotations and descriptions:
|
|
||||||
|
|
||||||
```python
|
|
||||||
Args:
|
|
||||||
input (Tensor): input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
|
|
||||||
weight (Tensor): filters of shape :math:`(\text{out\_channels} , kH , kW)`
|
|
||||||
bias (Tensor, optional): optional bias tensor of shape :math:`(\text{out\_channels})`. Default: ``None``
|
|
||||||
stride (int or tuple): the stride of the convolving kernel. Can be a single number or a
|
|
||||||
tuple `(sH, sW)`. Default: 1
|
|
||||||
```
|
|
||||||
|
|
||||||
**Formatting rules:**
|
|
||||||
- Parameter name in **lowercase**
|
|
||||||
- Type in parentheses: `(Type)`, `(Type, optional)` for optional parameters
|
|
||||||
- Description follows the type
|
|
||||||
- For optional parameters, include "Default: ``value``" at the end
|
|
||||||
- Use double backticks for inline code: ``` ``None`` ```
|
|
||||||
- Indent continuation lines by 2 spaces
|
|
||||||
|
|
||||||
### 7. Keyword Args Section (if applicable)
|
|
||||||
|
|
||||||
Sometimes keyword arguments are documented separately:
|
|
||||||
|
|
||||||
```python
|
|
||||||
Keyword args:
|
|
||||||
dtype (:class:`torch.dtype`, optional): the desired type of returned tensor.
|
|
||||||
Default: if None, same :class:`torch.dtype` as this tensor.
|
|
||||||
device (:class:`torch.device`, optional): the desired device of returned tensor.
|
|
||||||
Default: if None, same :class:`torch.device` as this tensor.
|
|
||||||
requires_grad (bool, optional): If autograd should record operations on the
|
|
||||||
returned tensor. Default: ``False``.
|
|
||||||
```
|
|
||||||
|
|
||||||
### 8. Returns Section (if needed)
|
|
||||||
|
|
||||||
Document the return value:
|
|
||||||
|
|
||||||
```python
|
|
||||||
Returns:
|
|
||||||
Tensor: Sampled tensor of same shape as `logits` from the Gumbel-Softmax distribution.
|
|
||||||
If ``hard=True``, the returned samples will be one-hot, otherwise they will
|
|
||||||
be probability distributions that sum to 1 across `dim`.
|
|
||||||
```
|
|
||||||
|
|
||||||
Or simply include it in the function signature line if obvious from context.
|
|
||||||
|
|
||||||
### 9. Examples Section
|
|
||||||
|
|
||||||
Always include examples when possible:
|
|
||||||
|
|
||||||
```python
|
|
||||||
Examples::
|
|
||||||
|
|
||||||
>>> inputs = torch.randn(33, 16, 30)
|
|
||||||
>>> filters = torch.randn(20, 16, 5)
|
|
||||||
>>> F.conv1d(inputs, filters)
|
|
||||||
|
|
||||||
>>> # With square kernels and equal stride
|
|
||||||
>>> filters = torch.randn(8, 4, 3, 3)
|
|
||||||
>>> inputs = torch.randn(1, 4, 5, 5)
|
|
||||||
>>> F.conv2d(inputs, filters, padding=1)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Formatting rules:**
|
|
||||||
- Use `Examples::` with double colon
|
|
||||||
- Use `>>>` prompt for Python code
|
|
||||||
- Include comments with `#` when helpful
|
|
||||||
- Show actual output when it helps understanding (indent without `>>>`)
|
|
||||||
|
|
||||||
### 10. External References
|
|
||||||
|
|
||||||
Link to papers or external documentation:
|
|
||||||
|
|
||||||
```python
|
|
||||||
.. _Link Name:
|
|
||||||
https://arxiv.org/abs/1611.00712
|
|
||||||
```
|
|
||||||
|
|
||||||
Reference them in text: ```See `Link Name`_```
|
|
||||||
|
|
||||||
## Method Types
|
|
||||||
|
|
||||||
### Native Python Functions
|
|
||||||
|
|
||||||
For regular Python functions, use a standard docstring:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def relu(input: Tensor, inplace: bool = False) -> Tensor:
|
|
||||||
r"""relu(input, inplace=False) -> Tensor
|
|
||||||
|
|
||||||
Applies the rectified linear unit function element-wise. See
|
|
||||||
:class:`~torch.nn.ReLU` for more details.
|
|
||||||
"""
|
|
||||||
# implementation
|
|
||||||
```
|
|
||||||
|
|
||||||
### C-Bound Functions (using add_docstr)
|
|
||||||
|
|
||||||
For C-bound functions, use `_add_docstr`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
conv1d = _add_docstr(
|
|
||||||
torch.conv1d,
|
|
||||||
r"""
|
|
||||||
conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1) -> Tensor
|
|
||||||
|
|
||||||
Applies a 1D convolution over an input signal composed of several input
|
|
||||||
planes.
|
|
||||||
|
|
||||||
See :class:`~torch.nn.Conv1d` for details and output shape.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input: input tensor of shape :math:`(\text{minibatch} , \text{in\_channels} , iW)`
|
|
||||||
weight: filters of shape :math:`(\text{out\_channels} , kW)`
|
|
||||||
...
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### In-Place Variants
|
|
||||||
|
|
||||||
For in-place operations (ending with `_`), reference the original:
|
|
||||||
|
|
||||||
```python
|
|
||||||
add_docstr_all(
|
|
||||||
"abs_",
|
|
||||||
r"""
|
|
||||||
abs_() -> Tensor
|
|
||||||
|
|
||||||
In-place version of :meth:`~Tensor.abs`
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Alias Functions
|
|
||||||
|
|
||||||
For aliases, simply reference the original:
|
|
||||||
|
|
||||||
```python
|
|
||||||
add_docstr_all(
|
|
||||||
"absolute",
|
|
||||||
r"""
|
|
||||||
absolute() -> Tensor
|
|
||||||
|
|
||||||
Alias for :func:`abs`
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Common Patterns
|
|
||||||
|
|
||||||
### Shape Documentation
|
|
||||||
|
|
||||||
Use LaTeX math notation for tensor shapes:
|
|
||||||
|
|
||||||
```python
|
|
||||||
:math:`(\text{minibatch} , \text{in\_channels} , iH , iW)`
|
|
||||||
```
|
|
||||||
|
|
||||||
### Reusable Argument Definitions
|
|
||||||
|
|
||||||
For commonly used arguments, define them once and reuse:
|
|
||||||
|
|
||||||
```python
|
|
||||||
common_args = parse_kwargs(
|
|
||||||
"""
|
|
||||||
dtype (:class:`torch.dtype`, optional): the desired type of returned tensor.
|
|
||||||
Default: if None, same as this tensor.
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
# Then use with .format():
|
|
||||||
r"""
|
|
||||||
...
|
|
||||||
|
|
||||||
Keyword args:
|
|
||||||
{dtype}
|
|
||||||
{device}
|
|
||||||
""".format(**common_args)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Template Insertion
|
|
||||||
|
|
||||||
Insert reproducibility notes or other common text:
|
|
||||||
|
|
||||||
```python
|
|
||||||
r"""
|
|
||||||
{tf32_note}
|
|
||||||
|
|
||||||
{cudnn_reproducibility_note}
|
|
||||||
""".format(**reproducibility_notes, **tf32_notes)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Complete Example
|
|
||||||
|
|
||||||
Here's a complete example showing all elements:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def gumbel_softmax(
|
|
||||||
logits: Tensor,
|
|
||||||
tau: float = 1,
|
|
||||||
hard: bool = False,
|
|
||||||
eps: float = 1e-10,
|
|
||||||
dim: int = -1,
|
|
||||||
) -> Tensor:
|
|
||||||
r"""
|
|
||||||
Sample from the Gumbel-Softmax distribution and optionally discretize.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
logits (Tensor): `[..., num_features]` unnormalized log probabilities
|
|
||||||
tau (float): non-negative scalar temperature
|
|
||||||
hard (bool): if ``True``, the returned samples will be discretized as one-hot vectors,
|
|
||||||
but will be differentiated as if it is the soft sample in autograd. Default: ``False``
|
|
||||||
dim (int): A dimension along which softmax will be computed. Default: -1
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tensor: Sampled tensor of same shape as `logits` from the Gumbel-Softmax distribution.
|
|
||||||
If ``hard=True``, the returned samples will be one-hot, otherwise they will
|
|
||||||
be probability distributions that sum to 1 across `dim`.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
This function is here for legacy reasons, may be removed from nn.Functional in the future.
|
|
||||||
|
|
||||||
Examples::
|
|
||||||
>>> logits = torch.randn(20, 32)
|
|
||||||
>>> # Sample soft categorical using reparametrization trick:
|
|
||||||
>>> F.gumbel_softmax(logits, tau=1, hard=False)
|
|
||||||
>>> # Sample hard categorical using "Straight-through" trick:
|
|
||||||
>>> F.gumbel_softmax(logits, tau=1, hard=True)
|
|
||||||
|
|
||||||
.. _Link 1:
|
|
||||||
https://arxiv.org/abs/1611.00712
|
|
||||||
"""
|
|
||||||
# implementation
|
|
||||||
```
|
|
||||||
|
|
||||||
## Quick Checklist
|
|
||||||
|
|
||||||
When writing a PyTorch docstring, ensure:
|
|
||||||
|
|
||||||
- [ ] Use raw string (`r"""`)
|
|
||||||
- [ ] Include function signature on first line
|
|
||||||
- [ ] Provide brief description
|
|
||||||
- [ ] Document all parameters in Args section with types
|
|
||||||
- [ ] Include default values for optional parameters
|
|
||||||
- [ ] Use Sphinx cross-references (`:func:`, `:class:`, `:meth:`)
|
|
||||||
- [ ] Add mathematical formulas if applicable
|
|
||||||
- [ ] Include at least one example in Examples section
|
|
||||||
- [ ] Add warnings/notes for important caveats
|
|
||||||
- [ ] Link to related module class with `:class:`
|
|
||||||
- [ ] Use proper math notation for tensor shapes
|
|
||||||
- [ ] Follow consistent formatting and indentation
|
|
||||||
|
|
||||||
## Common Sphinx Roles Reference
|
|
||||||
|
|
||||||
- `:class:\`~torch.nn.Module\`` - Class reference
|
|
||||||
- `:func:\`torch.function\`` - Function reference
|
|
||||||
- `:meth:\`~Tensor.method\`` - Method reference
|
|
||||||
- `:attr:\`attribute\`` - Attribute reference
|
|
||||||
- `:math:\`equation\`` - Inline math
|
|
||||||
- `:ref:\`label\`` - Internal reference
|
|
||||||
- ``` ``code`` ``` - Inline code (use double backticks)
|
|
||||||
|
|
||||||
## Additional Notes
|
|
||||||
|
|
||||||
- **Indentation**: Use 4 spaces for code, 2 spaces for continuation of parameter descriptions
|
|
||||||
- **Line length**: Try to keep lines under 100 characters when possible
|
|
||||||
- **Periods**: End sentences with periods, but not the signature line
|
|
||||||
- **Backticks**: Use double backticks for code: ``` ``True`` ``None`` ``False`` ```
|
|
||||||
- **Types**: Common types are `Tensor`, `int`, `float`, `bool`, `str`, `tuple`, `list`, etc.
|
|
||||||
@ -1,385 +0,0 @@
|
|||||||
---
|
|
||||||
name: skill-writer
|
|
||||||
description: Guide users through creating Agent Skills for Claude Code. Use when the user wants to create, write, author, or design a new Skill, or needs help with SKILL.md files, frontmatter, or skill structure.
|
|
||||||
---
|
|
||||||
|
|
||||||
# Skill Writer
|
|
||||||
|
|
||||||
This Skill helps you create well-structured Agent Skills for Claude Code that follow best practices and validation requirements.
|
|
||||||
|
|
||||||
## When to use this Skill
|
|
||||||
|
|
||||||
Use this Skill when:
|
|
||||||
- Creating a new Agent Skill
|
|
||||||
- Writing or updating SKILL.md files
|
|
||||||
- Designing skill structure and frontmatter
|
|
||||||
- Troubleshooting skill discovery issues
|
|
||||||
- Converting existing prompts or workflows into Skills
|
|
||||||
|
|
||||||
## Instructions
|
|
||||||
|
|
||||||
### Step 1: Determine Skill scope
|
|
||||||
|
|
||||||
First, understand what the Skill should do:
|
|
||||||
|
|
||||||
1. **Ask clarifying questions**:
|
|
||||||
- What specific capability should this Skill provide?
|
|
||||||
- When should Claude use this Skill?
|
|
||||||
- What tools or resources does it need?
|
|
||||||
- Is this for personal use or team sharing?
|
|
||||||
|
|
||||||
2. **Keep it focused**: One Skill = one capability
|
|
||||||
- Good: "PDF form filling", "Excel data analysis"
|
|
||||||
- Too broad: "Document processing", "Data tools"
|
|
||||||
|
|
||||||
### Step 2: Choose Skill location
|
|
||||||
|
|
||||||
Determine where to create the Skill:
|
|
||||||
|
|
||||||
**Personal Skills** (`~/.claude/skills/`):
|
|
||||||
- Individual workflows and preferences
|
|
||||||
- Experimental Skills
|
|
||||||
- Personal productivity tools
|
|
||||||
|
|
||||||
**Project Skills** (`.claude/skills/`):
|
|
||||||
- Team workflows and conventions
|
|
||||||
- Project-specific expertise
|
|
||||||
- Shared utilities (committed to git)
|
|
||||||
|
|
||||||
### Step 3: Create Skill structure
|
|
||||||
|
|
||||||
Create the directory and files:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Personal
|
|
||||||
mkdir -p ~/.claude/skills/skill-name
|
|
||||||
|
|
||||||
# Project
|
|
||||||
mkdir -p .claude/skills/skill-name
|
|
||||||
```
|
|
||||||
|
|
||||||
For multi-file Skills:
|
|
||||||
```
|
|
||||||
skill-name/
|
|
||||||
├── SKILL.md (required)
|
|
||||||
├── reference.md (optional)
|
|
||||||
├── examples.md (optional)
|
|
||||||
├── scripts/
|
|
||||||
│ └── helper.py (optional)
|
|
||||||
└── templates/
|
|
||||||
└── template.txt (optional)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 4: Write SKILL.md frontmatter
|
|
||||||
|
|
||||||
Create YAML frontmatter with required fields:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
---
|
|
||||||
name: skill-name
|
|
||||||
description: Brief description of what this does and when to use it
|
|
||||||
---
|
|
||||||
```
|
|
||||||
|
|
||||||
**Field requirements**:
|
|
||||||
|
|
||||||
- **name**:
|
|
||||||
- Lowercase letters, numbers, hyphens only
|
|
||||||
- Max 64 characters
|
|
||||||
- Must match directory name
|
|
||||||
- Good: `pdf-processor`, `git-commit-helper`
|
|
||||||
- Bad: `PDF_Processor`, `Git Commits!`
|
|
||||||
|
|
||||||
- **description**:
|
|
||||||
- Max 1024 characters
|
|
||||||
- Include BOTH what it does AND when to use it
|
|
||||||
- Use specific trigger words users would say
|
|
||||||
- Mention file types, operations, and context
|
|
||||||
|
|
||||||
**Optional frontmatter fields**:
|
|
||||||
|
|
||||||
- **allowed-tools**: Restrict tool access (comma-separated list)
|
|
||||||
```yaml
|
|
||||||
allowed-tools: Read, Grep, Glob
|
|
||||||
```
|
|
||||||
Use for:
|
|
||||||
- Read-only Skills
|
|
||||||
- Security-sensitive workflows
|
|
||||||
- Limited-scope operations
|
|
||||||
|
|
||||||
### Step 5: Write effective descriptions
|
|
||||||
|
|
||||||
The description is critical for Claude to discover your Skill.
|
|
||||||
|
|
||||||
**Formula**: `[What it does] + [When to use it] + [Key triggers]`
|
|
||||||
|
|
||||||
**Examples**:
|
|
||||||
|
|
||||||
✅ **Good**:
|
|
||||||
```yaml
|
|
||||||
description: Extract text and tables from PDF files, fill forms, merge documents. Use when working with PDF files or when the user mentions PDFs, forms, or document extraction.
|
|
||||||
```
|
|
||||||
|
|
||||||
✅ **Good**:
|
|
||||||
```yaml
|
|
||||||
description: Analyze Excel spreadsheets, create pivot tables, and generate charts. Use when working with Excel files, spreadsheets, or analyzing tabular data in .xlsx format.
|
|
||||||
```
|
|
||||||
|
|
||||||
❌ **Too vague**:
|
|
||||||
```yaml
|
|
||||||
description: Helps with documents
|
|
||||||
description: For data analysis
|
|
||||||
```
|
|
||||||
|
|
||||||
**Tips**:
|
|
||||||
- Include specific file extensions (.pdf, .xlsx, .json)
|
|
||||||
- Mention common user phrases ("analyze", "extract", "generate")
|
|
||||||
- List concrete operations (not generic verbs)
|
|
||||||
- Add context clues ("Use when...", "For...")
|
|
||||||
|
|
||||||
### Step 6: Structure the Skill content
|
|
||||||
|
|
||||||
Use clear Markdown sections:
|
|
||||||
|
|
||||||
```markdown
|
|
||||||
# Skill Name
|
|
||||||
|
|
||||||
Brief overview of what this Skill does.
|
|
||||||
|
|
||||||
## Quick start
|
|
||||||
|
|
||||||
Provide a simple example to get started immediately.
|
|
||||||
|
|
||||||
## Instructions
|
|
||||||
|
|
||||||
Step-by-step guidance for Claude:
|
|
||||||
1. First step with clear action
|
|
||||||
2. Second step with expected outcome
|
|
||||||
3. Handle edge cases
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
Show concrete usage examples with code or commands.
|
|
||||||
|
|
||||||
## Best practices
|
|
||||||
|
|
||||||
- Key conventions to follow
|
|
||||||
- Common pitfalls to avoid
|
|
||||||
- When to use vs. not use
|
|
||||||
|
|
||||||
## Requirements
|
|
||||||
|
|
||||||
List any dependencies or prerequisites:
|
|
||||||
```bash
|
|
||||||
pip install package-name
|
|
||||||
```
|
|
||||||
|
|
||||||
## Advanced usage
|
|
||||||
|
|
||||||
For complex scenarios, see [reference.md](reference.md).
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 7: Add supporting files (optional)
|
|
||||||
|
|
||||||
Create additional files for progressive disclosure:
|
|
||||||
|
|
||||||
**reference.md**: Detailed API docs, advanced options
|
|
||||||
**examples.md**: Extended examples and use cases
|
|
||||||
**scripts/**: Helper scripts and utilities
|
|
||||||
**templates/**: File templates or boilerplate
|
|
||||||
|
|
||||||
Reference them from SKILL.md:
|
|
||||||
```markdown
|
|
||||||
For advanced usage, see [reference.md](reference.md).
|
|
||||||
|
|
||||||
Run the helper script:
|
|
||||||
\`\`\`bash
|
|
||||||
python scripts/helper.py input.txt
|
|
||||||
\`\`\`
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 8: Validate the Skill
|
|
||||||
|
|
||||||
Check these requirements:
|
|
||||||
|
|
||||||
✅ **File structure**:
|
|
||||||
- [ ] SKILL.md exists in correct location
|
|
||||||
- [ ] Directory name matches frontmatter `name`
|
|
||||||
|
|
||||||
✅ **YAML frontmatter**:
|
|
||||||
- [ ] Opening `---` on line 1
|
|
||||||
- [ ] Closing `---` before content
|
|
||||||
- [ ] Valid YAML (no tabs, correct indentation)
|
|
||||||
- [ ] `name` follows naming rules
|
|
||||||
- [ ] `description` is specific and < 1024 chars
|
|
||||||
|
|
||||||
✅ **Content quality**:
|
|
||||||
- [ ] Clear instructions for Claude
|
|
||||||
- [ ] Concrete examples provided
|
|
||||||
- [ ] Edge cases handled
|
|
||||||
- [ ] Dependencies listed (if any)
|
|
||||||
|
|
||||||
✅ **Testing**:
|
|
||||||
- [ ] Description matches user questions
|
|
||||||
- [ ] Skill activates on relevant queries
|
|
||||||
- [ ] Instructions are clear and actionable
|
|
||||||
|
|
||||||
### Step 9: Test the Skill
|
|
||||||
|
|
||||||
1. **Restart Claude Code** (if running) to load the Skill
|
|
||||||
|
|
||||||
2. **Ask relevant questions** that match the description:
|
|
||||||
```
|
|
||||||
Can you help me extract text from this PDF?
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Verify activation**: Claude should use the Skill automatically
|
|
||||||
|
|
||||||
4. **Check behavior**: Confirm Claude follows the instructions correctly
|
|
||||||
|
|
||||||
### Step 10: Debug if needed
|
|
||||||
|
|
||||||
If Claude doesn't use the Skill:
|
|
||||||
|
|
||||||
1. **Make description more specific**:
|
|
||||||
- Add trigger words
|
|
||||||
- Include file types
|
|
||||||
- Mention common user phrases
|
|
||||||
|
|
||||||
2. **Check file location**:
|
|
||||||
```bash
|
|
||||||
ls ~/.claude/skills/skill-name/SKILL.md
|
|
||||||
ls .claude/skills/skill-name/SKILL.md
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Validate YAML**:
|
|
||||||
```bash
|
|
||||||
cat SKILL.md | head -n 10
|
|
||||||
```
|
|
||||||
|
|
||||||
4. **Run debug mode**:
|
|
||||||
```bash
|
|
||||||
claude --debug
|
|
||||||
```
|
|
||||||
|
|
||||||
## Common patterns
|
|
||||||
|
|
||||||
### Read-only Skill
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
---
|
|
||||||
name: code-reader
|
|
||||||
description: Read and analyze code without making changes. Use for code review, understanding codebases, or documentation.
|
|
||||||
allowed-tools: Read, Grep, Glob
|
|
||||||
---
|
|
||||||
```
|
|
||||||
|
|
||||||
### Script-based Skill
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
---
|
|
||||||
name: data-processor
|
|
||||||
description: Process CSV and JSON data files with Python scripts. Use when analyzing data files or transforming datasets.
|
|
||||||
---
|
|
||||||
|
|
||||||
# Data Processor
|
|
||||||
|
|
||||||
## Instructions
|
|
||||||
|
|
||||||
1. Use the processing script:
|
|
||||||
\`\`\`bash
|
|
||||||
python scripts/process.py input.csv --output results.json
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
2. Validate output with:
|
|
||||||
\`\`\`bash
|
|
||||||
python scripts/validate.py results.json
|
|
||||||
\`\`\`
|
|
||||||
```
|
|
||||||
|
|
||||||
### Multi-file Skill with progressive disclosure
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
---
|
|
||||||
name: api-designer
|
|
||||||
description: Design REST APIs following best practices. Use when creating API endpoints, designing routes, or planning API architecture.
|
|
||||||
---
|
|
||||||
|
|
||||||
# API Designer
|
|
||||||
|
|
||||||
Quick start: See [examples.md](examples.md)
|
|
||||||
|
|
||||||
Detailed reference: See [reference.md](reference.md)
|
|
||||||
|
|
||||||
## Instructions
|
|
||||||
|
|
||||||
1. Gather requirements
|
|
||||||
2. Design endpoints (see examples.md)
|
|
||||||
3. Document with OpenAPI spec
|
|
||||||
4. Review against best practices (see reference.md)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Best practices for Skill authors
|
|
||||||
|
|
||||||
1. **One Skill, one purpose**: Don't create mega-Skills
|
|
||||||
2. **Specific descriptions**: Include trigger words users will say
|
|
||||||
3. **Clear instructions**: Write for Claude, not humans
|
|
||||||
4. **Concrete examples**: Show real code, not pseudocode
|
|
||||||
5. **List dependencies**: Mention required packages in description
|
|
||||||
6. **Test with teammates**: Verify activation and clarity
|
|
||||||
7. **Version your Skills**: Document changes in content
|
|
||||||
8. **Use progressive disclosure**: Put advanced details in separate files
|
|
||||||
|
|
||||||
## Validation checklist
|
|
||||||
|
|
||||||
Before finalizing a Skill, verify:
|
|
||||||
|
|
||||||
- [ ] Name is lowercase, hyphens only, max 64 chars
|
|
||||||
- [ ] Description is specific and < 1024 chars
|
|
||||||
- [ ] Description includes "what" and "when"
|
|
||||||
- [ ] YAML frontmatter is valid
|
|
||||||
- [ ] Instructions are step-by-step
|
|
||||||
- [ ] Examples are concrete and realistic
|
|
||||||
- [ ] Dependencies are documented
|
|
||||||
- [ ] File paths use forward slashes
|
|
||||||
- [ ] Skill activates on relevant queries
|
|
||||||
- [ ] Claude follows instructions correctly
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
**Skill doesn't activate**:
|
|
||||||
- Make description more specific with trigger words
|
|
||||||
- Include file types and operations in description
|
|
||||||
- Add "Use when..." clause with user phrases
|
|
||||||
|
|
||||||
**Multiple Skills conflict**:
|
|
||||||
- Make descriptions more distinct
|
|
||||||
- Use different trigger words
|
|
||||||
- Narrow the scope of each Skill
|
|
||||||
|
|
||||||
**Skill has errors**:
|
|
||||||
- Check YAML syntax (no tabs, proper indentation)
|
|
||||||
- Verify file paths (use forward slashes)
|
|
||||||
- Ensure scripts have execute permissions
|
|
||||||
- List all dependencies
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
See the documentation for complete examples:
|
|
||||||
- Simple single-file Skill (commit-helper)
|
|
||||||
- Skill with tool permissions (code-reviewer)
|
|
||||||
- Multi-file Skill (pdf-processing)
|
|
||||||
|
|
||||||
## Output format
|
|
||||||
|
|
||||||
When creating a Skill, I will:
|
|
||||||
|
|
||||||
1. Ask clarifying questions about scope and requirements
|
|
||||||
2. Suggest a Skill name and location
|
|
||||||
3. Create the SKILL.md file with proper frontmatter
|
|
||||||
4. Include clear instructions and examples
|
|
||||||
5. Add supporting files if needed
|
|
||||||
6. Provide testing instructions
|
|
||||||
7. Validate against all requirements
|
|
||||||
|
|
||||||
The result will be a complete, working Skill that follows all best practices and validation rules.
|
|
||||||
8
.flake8
8
.flake8
@ -7,12 +7,16 @@ max-line-length = 120
|
|||||||
# C408 ignored because we like the dict keyword argument syntax
|
# C408 ignored because we like the dict keyword argument syntax
|
||||||
# E501 is not flexible enough, we're using B950 instead
|
# E501 is not flexible enough, we're using B950 instead
|
||||||
ignore =
|
ignore =
|
||||||
E203,E305,E402,E501,E704,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,F824,
|
E203,E305,E402,E501,E704,E721,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,F824,
|
||||||
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
|
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
|
||||||
# to line this up with executable bit
|
# to line this up with executable bit
|
||||||
EXE001,
|
EXE001,
|
||||||
# these ignores are from flake8-bugbear; please fix!
|
# these ignores are from flake8-bugbear; please fix!
|
||||||
B007,B008,B017,B019,B023,B028,B903,B905,B906,B907,B908,B910
|
B007,B008,B017,B019,B023,B028,B903,B904,B905,B906,B907,B908,B910
|
||||||
|
# these ignores are from flake8-comprehensions; please fix!
|
||||||
|
C407,
|
||||||
|
# these ignores are from flake8-logging-format; please fix!
|
||||||
|
G100,G101,G200
|
||||||
# these ignores are from flake8-simplify. please fix or ignore with commented reason
|
# these ignores are from flake8-simplify. please fix or ignore with commented reason
|
||||||
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
|
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
|
||||||
# SIM104 is already covered by pyupgrade ruff
|
# SIM104 is already covered by pyupgrade ruff
|
||||||
|
|||||||
1
.github/ISSUE_TEMPLATE/ci-sev.md
vendored
1
.github/ISSUE_TEMPLATE/ci-sev.md
vendored
@ -8,7 +8,6 @@ assignees: ''
|
|||||||
---
|
---
|
||||||
|
|
||||||
> NOTE: Remember to label this issue with "`ci: sev`"
|
> NOTE: Remember to label this issue with "`ci: sev`"
|
||||||
> If you want autorevert to be disabled, keep the ci: disable-autorevert label
|
|
||||||
|
|
||||||
<!-- Add the `merge blocking` label to this PR to prevent PRs from being merged while this issue is open -->
|
<!-- Add the `merge blocking` label to this PR to prevent PRs from being merged while this issue is open -->
|
||||||
|
|
||||||
|
|||||||
4
.github/ISSUE_TEMPLATE/disable-autorevert.md
vendored
4
.github/ISSUE_TEMPLATE/disable-autorevert.md
vendored
@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
name: "D❌\U0001F519 ISABLE AUTOREVERT"
|
name: DISABLE AUTOREVERT
|
||||||
about: Disables autorevert when open
|
about: Disables autorevert when open
|
||||||
title: "[DISABLE AUTOREVERT]"
|
title: "❌\U0001F519 [DISABLE AUTOREVERT]"
|
||||||
labels: 'ci: disable-autorevert'
|
labels: 'ci: disable-autorevert'
|
||||||
assignees: ''
|
assignees: ''
|
||||||
|
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
name: 🚀 New Feature for Release
|
name: 🚀 Release highlight for proposed Feature
|
||||||
description: Submit a Release highlight for proposed Feature
|
description: Submit a Release highlight for proposed Feature
|
||||||
labels: ["release-feature-request"]
|
labels: ["release-feature-request"]
|
||||||
|
|
||||||
body:
|
body:
|
||||||
- type: textarea
|
- type: textarea
|
||||||
attributes:
|
attributes:
|
||||||
label: New Feature for Release
|
label: Release highlight for proposed Feature
|
||||||
description: >
|
description: >
|
||||||
Example: “A torch.special module, analogous to SciPy's special module.”
|
Example: “A torch.special module, analogous to SciPy's special module.”
|
||||||
- type: input
|
- type: input
|
||||||
|
|||||||
@ -65,7 +65,7 @@ runs:
|
|||||||
cd .ci/lumen_cli
|
cd .ci/lumen_cli
|
||||||
python3 -m pip install -e .
|
python3 -m pip install -e .
|
||||||
)
|
)
|
||||||
MAX_JOBS="$(nproc --ignore=10)"
|
MAX_JOBS="$(nproc --ignore=6)"
|
||||||
export MAX_JOBS
|
export MAX_JOBS
|
||||||
|
|
||||||
# Split the comma-separated list and build each target
|
# Split the comma-separated list and build each target
|
||||||
|
|||||||
4
.github/actions/diskspace-cleanup/action.yml
vendored
4
.github/actions/diskspace-cleanup/action.yml
vendored
@ -27,9 +27,7 @@ runs:
|
|||||||
docker system prune -af
|
docker system prune -af
|
||||||
diskspace_new=$(df -H --output=pcent ${docker_root_dir} | sed -n 2p | sed 's/%//' | sed 's/ //')
|
diskspace_new=$(df -H --output=pcent ${docker_root_dir} | sed -n 2p | sed 's/%//' | sed 's/ //')
|
||||||
if [[ "$diskspace_new" -gt "$diskspace_cutoff" ]] ; then
|
if [[ "$diskspace_new" -gt "$diskspace_cutoff" ]] ; then
|
||||||
diskspace_cutoff_int=$((diskspace_cutoff + 0))
|
echo "Error: Available diskspace is less than $diskspace_cutoff percent. Not enough diskspace."
|
||||||
difference=$((100 - diskspace_cutoff_int))
|
|
||||||
echo "Error: Available diskspace is less than $difference percent. Not enough diskspace."
|
|
||||||
echo "$msg"
|
echo "$msg"
|
||||||
exit 1
|
exit 1
|
||||||
else
|
else
|
||||||
|
|||||||
2
.github/actions/linux-test/action.yml
vendored
2
.github/actions/linux-test/action.yml
vendored
@ -274,6 +274,8 @@ runs:
|
|||||||
-w /var/lib/jenkins/workspace \
|
-w /var/lib/jenkins/workspace \
|
||||||
"${DOCKER_IMAGE}"
|
"${DOCKER_IMAGE}"
|
||||||
)
|
)
|
||||||
|
# Propagate download.pytorch.org IP to container
|
||||||
|
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" sudo bash -c "/bin/cat >> /etc/hosts"
|
||||||
echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
|
echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
|
||||||
docker exec -t "${container_name}" sh -c "pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}"
|
docker exec -t "${container_name}" sh -c "pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}"
|
||||||
|
|
||||||
|
|||||||
12
.github/actions/pytest-cache-download/action.yml
vendored
12
.github/actions/pytest-cache-download/action.yml
vendored
@ -38,9 +38,9 @@ runs:
|
|||||||
run: |
|
run: |
|
||||||
python3 .github/scripts/pytest_cache.py \
|
python3 .github/scripts/pytest_cache.py \
|
||||||
--download \
|
--download \
|
||||||
--cache_dir "$GITHUB_WORKSPACE/$CACHE_DIR" \
|
--cache_dir $GITHUB_WORKSPACE/$CACHE_DIR \
|
||||||
--pr_identifier "$GITHUB_REF" \
|
--pr_identifier $GITHUB_REF \
|
||||||
--job_identifier "$JOB_IDENTIFIER" \
|
--job_identifier $JOB_IDENTIFIER \
|
||||||
--temp_dir "$RUNNER_TEMP" \
|
--temp_dir $RUNNER_TEMP \
|
||||||
--repo "$REPO" \
|
--repo $REPO \
|
||||||
--bucket "$BUCKET" \
|
--bucket $BUCKET \
|
||||||
|
|||||||
16
.github/actions/pytest-cache-upload/action.yml
vendored
16
.github/actions/pytest-cache-upload/action.yml
vendored
@ -47,11 +47,11 @@ runs:
|
|||||||
run: |
|
run: |
|
||||||
python3 .github/scripts/pytest_cache.py \
|
python3 .github/scripts/pytest_cache.py \
|
||||||
--upload \
|
--upload \
|
||||||
--cache_dir "$GITHUB_WORKSPACE/$CACHE_DIR" \
|
--cache_dir $GITHUB_WORKSPACE/$CACHE_DIR \
|
||||||
--pr_identifier "$GITHUB_REF" \
|
--pr_identifier $GITHUB_REF \
|
||||||
--job_identifier "$JOB_IDENTIFIER" \
|
--job_identifier $JOB_IDENTIFIER \
|
||||||
--sha "$SHA" \
|
--sha $SHA \
|
||||||
--test_config "$TEST_CONFIG" \
|
--test_config $TEST_CONFIG \
|
||||||
--shard "$SHARD" \
|
--shard $SHARD \
|
||||||
--repo "$REPO" \
|
--repo $REPO \
|
||||||
--temp_dir "$RUNNER_TEMP" \
|
--temp_dir $RUNNER_TEMP \
|
||||||
|
|||||||
35
.github/actions/setup-linux/action.yml
vendored
35
.github/actions/setup-linux/action.yml
vendored
@ -28,10 +28,6 @@ runs:
|
|||||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||||
echo "system info $(uname -a)"
|
echo "system info $(uname -a)"
|
||||||
|
|
||||||
- name: Print GPU info (if present)
|
|
||||||
shell: bash
|
|
||||||
run: if [ -f /usr/bin/nvidia-smi ]; then nvidia-smi; fi
|
|
||||||
|
|
||||||
- name: Check if in a container runner
|
- name: Check if in a container runner
|
||||||
shell: bash
|
shell: bash
|
||||||
id: check_container_runner
|
id: check_container_runner
|
||||||
@ -86,6 +82,37 @@ runs:
|
|||||||
# Prune all of the docker images
|
# Prune all of the docker images
|
||||||
docker system prune -af
|
docker system prune -af
|
||||||
|
|
||||||
|
- name: Manually resolve download.pytorch.org
|
||||||
|
shell: bash
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
set +e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
PT_DOMAIN=download.pytorch.org
|
||||||
|
# TODO: Flaky access to download.pytorch.org https://github.com/pytorch/pytorch/issues/100400,
|
||||||
|
# cleaning this up once the issue is fixed. There are more than one resolved IP here, the last
|
||||||
|
# one is returned at random
|
||||||
|
RESOLVED_IP=$(dig -4 +short "${PT_DOMAIN}" | tail -n1)
|
||||||
|
|
||||||
|
if [ -z "${RESOLVED_IP}" ]; then
|
||||||
|
echo "Couldn't resolve ${PT_DOMAIN}, retrying with Google DNS..."
|
||||||
|
RESOLVED_IP=$(dig -4 +short "${PT_DOMAIN}" @8.8.8.8 | tail -n1)
|
||||||
|
|
||||||
|
if [ -z "${RESOLVED_IP}" ]; then
|
||||||
|
echo "Couldn't resolve ${PT_DOMAIN}, exiting..."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if grep -r "${PT_DOMAIN}" /etc/hosts; then
|
||||||
|
# Clean up any old records first
|
||||||
|
sudo sed -i "/${PT_DOMAIN}/d" /etc/hosts
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "${RESOLVED_IP} ${PT_DOMAIN}" | sudo tee -a /etc/hosts
|
||||||
|
cat /etc/hosts
|
||||||
|
|
||||||
- name: Check that the docker daemon is running
|
- name: Check that the docker daemon is running
|
||||||
shell: bash
|
shell: bash
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
|
|||||||
20
.github/actions/setup-rocm/action.yml
vendored
20
.github/actions/setup-rocm/action.yml
vendored
@ -111,23 +111,3 @@ runs:
|
|||||||
# This video group ID maps to subgid 1 inside the docker image due to the /etc/subgid entries.
|
# This video group ID maps to subgid 1 inside the docker image due to the /etc/subgid entries.
|
||||||
# The group name corresponding to group ID 1 can change depending on the OS, so both are necessary.
|
# The group name corresponding to group ID 1 can change depending on the OS, so both are necessary.
|
||||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd $DEVICE_FLAG --group-add video --group-add $render_gid --group-add daemon --group-add bin --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --network=host" >> "${GITHUB_ENV}"
|
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd $DEVICE_FLAG --group-add video --group-add $render_gid --group-add daemon --group-add bin --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --network=host" >> "${GITHUB_ENV}"
|
||||||
|
|
||||||
- name: configure aws credentials
|
|
||||||
id: aws_creds
|
|
||||||
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
|
|
||||||
with:
|
|
||||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
|
||||||
aws-region: us-east-1
|
|
||||||
role-duration-seconds: 18000
|
|
||||||
|
|
||||||
- name: Login to Amazon ECR
|
|
||||||
id: login-ecr
|
|
||||||
continue-on-error: true
|
|
||||||
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
|
|
||||||
|
|
||||||
- name: Preserve github env variables for use in docker
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
env | grep '^GITHUB' >> "${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}"
|
|
||||||
env | grep '^CI' >> "${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}"
|
|
||||||
env | grep '^RUNNER' >> "${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}"
|
|
||||||
|
|||||||
@ -33,6 +33,10 @@ runs:
|
|||||||
)
|
)
|
||||||
|
|
||||||
echo "CONTAINER_NAME=${container_name}" >> "$GITHUB_ENV"
|
echo "CONTAINER_NAME=${container_name}" >> "$GITHUB_ENV"
|
||||||
|
if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" && "${BUILD_ENVIRONMENT}" != "linux-s390x-binary-manywheel" && "${GPU_ARCH_TYPE}" != "xpu" ]]; then
|
||||||
|
# Propagate download.pytorch.org IP to container. This is only needed on Linux non aarch64 runner
|
||||||
|
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" bash -c "/bin/cat >> /etc/hosts"
|
||||||
|
fi
|
||||||
|
|
||||||
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
||||||
# Generate test script
|
# Generate test script
|
||||||
|
|||||||
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
|||||||
ad5816f0eee1c873df1b7d371c69f1f811a89387
|
87ff22e49ed0e92576c4935ccb8c143daac4a3cd
|
||||||
|
|||||||
2
.github/ci_commit_pins/vision.txt
vendored
2
.github/ci_commit_pins/vision.txt
vendored
@ -1 +1 @@
|
|||||||
ccb801b88af136454798b945175c4c87e636ac33
|
966da7e46f65d6d49df3e31214470a4fe5cc8e66
|
||||||
|
|||||||
2
.github/ci_commit_pins/vllm.txt
vendored
2
.github/ci_commit_pins/vllm.txt
vendored
@ -1 +1 @@
|
|||||||
e5192819208c4d68194844b7dfafbc00020d0dea
|
0ad9951c416d33c5da4f7a504fb162cbe62386f5
|
||||||
|
|||||||
2
.github/ci_commit_pins/xla.txt
vendored
2
.github/ci_commit_pins/xla.txt
vendored
@ -1 +1 @@
|
|||||||
e4d25697f9dc5eedaf8f0a5bf085c62c5455a53a
|
2a9138a26ee257fef05310ad3fecf7c55fe80d73
|
||||||
|
|||||||
@ -1,41 +1,59 @@
|
|||||||
|
# TODO(elainwy): remove this file after the torch nightly dockerfile is in sync in vllm repo
|
||||||
|
# The vLLM Dockerfile is used to construct vLLM image against torch nightly and torch main that can be directly used for testing
|
||||||
|
|
||||||
ARG CUDA_VERSION=12.8.1
|
ARG CUDA_VERSION=12.8.1
|
||||||
ARG PYTHON_VERSION=3.12
|
ARG PYTHON_VERSION=3.12
|
||||||
|
|
||||||
# BUILD_BASE_IMAGE: used to setup python build xformers, and vllm wheels, It can be replaced with a different base image from local machine,
|
# BUILD_BASE_IMAGE: used to setup python build xformers, and vllm wheels, It can be replaced with a different base image from local machine,
|
||||||
# by default, it uses the torch-nightly-base stage from this docker image
|
# by default, it uses the torch-nightly-base stage from this docker image
|
||||||
ARG BUILD_BASE_IMAGE=torch-nightly-base
|
ARG BUILD_BASE_IMAGE=torch-nightly-base
|
||||||
|
|
||||||
|
# FINAL_BASE_IMAGE: used to set up vllm-instaled environment and build flashinfer,
|
||||||
|
# by default, it uses devel-ubuntu22.04 official image.
|
||||||
ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
|
ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
|
||||||
|
|
||||||
# The logic is copied from https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile
|
# The logic is copied from https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile
|
||||||
ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"
|
ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"
|
||||||
|
|
||||||
|
|
||||||
#################### TORCH NIGHTLY BASE IMAGE ####################
|
#################### TORCH NIGHTLY BASE IMAGE ####################
|
||||||
|
# A base image for building vLLM with devel ubuntu 22.04, this is mainly used to build vllm in vllm builtkite ci
|
||||||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 as torch-nightly-base
|
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 as torch-nightly-base
|
||||||
|
|
||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION
|
||||||
ARG PYTHON_VERSION
|
ARG PYTHON_VERSION
|
||||||
ARG GET_PIP_URL
|
ARG GET_PIP_URL
|
||||||
|
|
||||||
# Install system dependencies and uv, then create Python virtual environment
|
# Install Python and other dependencies
|
||||||
RUN apt-get update -y \
|
RUN apt-get update -y \
|
||||||
&& apt-get install -y ccache software-properties-common git curl sudo vim python3-pip \
|
&& apt-get install -y ccache software-properties-common git curl wget sudo vim \
|
||||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
||||||
&& $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION} \
|
&& apt-get update -y \
|
||||||
&& rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
|
&& apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
|
||||||
&& ln -s /opt/venv/bin/python3 /usr/bin/python3 \
|
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
|
||||||
&& ln -s /opt/venv/bin/python3-config /usr/bin/python3-config \
|
&& update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
|
||||||
&& ln -s /opt/venv/bin/pip /usr/bin/pip \
|
&& ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
|
||||||
|
&& curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \
|
||||||
&& python3 --version && python3 -m pip --version
|
&& python3 --version && python3 -m pip --version
|
||||||
|
|
||||||
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
|
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
|
||||||
# as it was causing spam when compiling the CUTLASS kernels
|
# as it was causing spam when compiling the CUTLASS kernels
|
||||||
RUN apt-get install -y gcc-10 g++-10
|
# Ensure gcc >= 10 to avoid CUTLASS issues (bug 92519)
|
||||||
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10
|
RUN current_gcc_version=$(gcc -dumpversion | cut -f1 -d.) && \
|
||||||
RUN <<EOF
|
if command -v apt-get >/dev/null; then \
|
||||||
gcc --version
|
if [ "$current_gcc_version" -lt 10 ]; then \
|
||||||
EOF
|
echo "GCC version is $current_gcc_version, installing gcc-10..."; \
|
||||||
|
apt-get update \
|
||||||
|
&& apt-get install -y gcc-10 g++-10 \
|
||||||
|
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 \
|
||||||
|
&& update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100; \
|
||||||
|
else \
|
||||||
|
echo "GCC version is $current_gcc_version, no need to install gcc-10."; \
|
||||||
|
fi \
|
||||||
|
fi \
|
||||||
|
&& gcc --version && g++ --version
|
||||||
|
|
||||||
# Install uv for faster pip installs
|
# install uv for faster pip installs
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
python3 -m pip install uv==0.8.4
|
python3 -m pip install uv==0.8.4
|
||||||
|
|
||||||
@ -43,32 +61,36 @@ ENV UV_HTTP_TIMEOUT=500
|
|||||||
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||||
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
||||||
ENV UV_LINK_MODE=copy
|
ENV UV_LINK_MODE=copy
|
||||||
|
|
||||||
#################### TORCH NIGHTLY BASE IMAGE ####################
|
#################### TORCH NIGHTLY BASE IMAGE ####################
|
||||||
|
|
||||||
|
|
||||||
#################### BASE BUILD IMAGE ####################
|
#################### BASE BUILD IMAGE ####################
|
||||||
|
# A base image for building vLLM with torch nightly or torch wheels
|
||||||
|
# prepare basic build environment
|
||||||
FROM ${BUILD_BASE_IMAGE} AS base
|
FROM ${BUILD_BASE_IMAGE} AS base
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION
|
||||||
ARG PYTHON_VERSION
|
ARG PYTHON_VERSION
|
||||||
|
|
||||||
# Only work with PyTorch manylinux builder
|
# TODO (huydhn): Only work with PyTorch manylinux builder
|
||||||
ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
|
ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
|
||||||
|
|
||||||
# Install some system dependencies and double check python version
|
# Install some system dependencies and double check python version
|
||||||
RUN if command -v apt-get >/dev/null; then \
|
RUN if command -v apt-get >/dev/null; then \
|
||||||
apt-get update -y \
|
apt-get update -y \
|
||||||
&& apt-get install -y ccache software-properties-common git wget sudo vim; \
|
&& apt-get install -y ccache software-properties-common git curl wget sudo vim; \
|
||||||
else \
|
else \
|
||||||
dnf install -y git wget sudo; \
|
dnf install -y git curl wget sudo; \
|
||||||
fi \
|
fi \
|
||||||
&& python3 --version && python3 -m pip --version
|
&& python3 --version && python3 -m pip --version
|
||||||
|
|
||||||
# Install uv for faster pip installs if not existed
|
# Install uv for faster pip installs if not existed
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
python3 -m pip install uv==0.8.4
|
if ! python3 -m uv --version >/dev/null 2>&1; then \
|
||||||
|
python3 -m pip install uv==0.8.4; \
|
||||||
|
fi
|
||||||
ENV UV_HTTP_TIMEOUT=500
|
ENV UV_HTTP_TIMEOUT=500
|
||||||
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||||
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
||||||
@ -76,15 +98,15 @@ ENV UV_LINK_MODE=copy
|
|||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
# Install build and runtime dependencies
|
# install build and runtime dependencies
|
||||||
COPY requirements/common.txt requirements/common.txt
|
COPY requirements/common.txt requirements/common.txt
|
||||||
COPY use_existing_torch.py use_existing_torch.py
|
COPY use_existing_torch.py use_existing_torch.py
|
||||||
COPY pyproject.toml pyproject.toml
|
COPY pyproject.toml pyproject.toml
|
||||||
|
|
||||||
# Install build and runtime dependencies without stable torch version
|
# install build and runtime dependencies without stable torch version
|
||||||
RUN python3 use_existing_torch.py
|
RUN python3 use_existing_torch.py
|
||||||
|
|
||||||
# Default mount file as placeholder, this just avoid the mount error
|
# default mount file as placeholder, this just avoid the mount error
|
||||||
# change to a different vllm folder if this does not exist anymore
|
# change to a different vllm folder if this does not exist anymore
|
||||||
ARG TORCH_WHEELS_PATH="./requirements"
|
ARG TORCH_WHEELS_PATH="./requirements"
|
||||||
ARG PINNED_TORCH_VERSION
|
ARG PINNED_TORCH_VERSION
|
||||||
@ -116,36 +138,56 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system -r requirements/common.txt
|
uv pip install --system -r requirements/common.txt
|
||||||
|
|
||||||
|
# Must put before installing xformers, so it can install the correct version of xfomrers.
|
||||||
|
ARG xformers_cuda_arch_list='7.5;8.0+PTX;9.0a'
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST=${xformers_cuda_arch_list}
|
||||||
|
|
||||||
ARG max_jobs=16
|
ARG max_jobs=16
|
||||||
ENV MAX_JOBS=${max_jobs}
|
ENV MAX_JOBS=${max_jobs}
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
|
RUN echo ${TORCH_CUDA_ARCH_LIST}
|
||||||
export TORCH_CUDA_ARCH_LIST='7.5 8.0+PTX 9.0a'
|
RUN echo ${MAX_JOBS}
|
||||||
git clone https://github.com/facebookresearch/xformers.git
|
RUN pip freeze | grep -E 'ninja'
|
||||||
|
|
||||||
pushd xformers
|
# Build xformers with cuda and torch nightly/wheel
|
||||||
git checkout v0.0.32.post2
|
# following official xformers guidance: https://github.com/facebookresearch/xformers#build
|
||||||
git submodule update --init --recursive
|
# sha for https://github.com/facebookresearch/xformers/tree/v0.0.32.post2
|
||||||
python3 setup.py bdist_wheel --dist-dir=../xformers-dist --verbose
|
ARG XFORMERS_COMMIT=5d4b92a5e5a9c6c6d4878283f47d82e17995b468
|
||||||
popd
|
ENV CCACHE_DIR=/root/.cache/ccache
|
||||||
|
|
||||||
rm -rf xformers
|
RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||||
BASH
|
--mount=type=cache,target=/root/.cache/uv \
|
||||||
|
echo 'git clone xformers...' \
|
||||||
|
&& git clone https://github.com/facebookresearch/xformers.git --recursive \
|
||||||
|
&& cd xformers \
|
||||||
|
&& git checkout ${XFORMERS_COMMIT} \
|
||||||
|
&& git submodule update --init --recursive \
|
||||||
|
&& echo 'finish git clone xformers...' \
|
||||||
|
&& rm -rf build \
|
||||||
|
&& python3 setup.py bdist_wheel --dist-dir=../xformers-dist --verbose \
|
||||||
|
&& cd .. \
|
||||||
|
&& rm -rf xformers
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system xformers-dist/*.whl
|
uv pip install --system xformers-dist/*.whl --verbose
|
||||||
|
|
||||||
|
# Build can take a long time, and the torch nightly version fetched from url can be different in next docker stage.
|
||||||
|
# track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same
|
||||||
RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt
|
RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt
|
||||||
|
|
||||||
RUN cat torch_build_versions.txt
|
RUN cat torch_build_versions.txt
|
||||||
RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'
|
RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'
|
||||||
|
|
||||||
#################### BASE BUILD IMAGE ####################
|
#################### BASE BUILD IMAGE ####################
|
||||||
|
|
||||||
|
|
||||||
#################### WHEEL BUILD IMAGE ####################
|
#################### WHEEL BUILD IMAGE ####################
|
||||||
|
# Image used to build vllm wheel
|
||||||
FROM base AS build
|
FROM base AS build
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
RUN python3 use_existing_torch.py
|
RUN python3 use_existing_torch.py
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
@ -155,17 +197,20 @@ ARG GIT_REPO_CHECK=0
|
|||||||
RUN --mount=type=bind,source=.git,target=.git \
|
RUN --mount=type=bind,source=.git,target=.git \
|
||||||
if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi
|
if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi
|
||||||
|
|
||||||
|
# Max jobs used by Ninja to build extensions
|
||||||
ARG max_jobs=16
|
ARG max_jobs=16
|
||||||
ENV MAX_JOBS=${max_jobs}
|
ENV MAX_JOBS=${max_jobs}
|
||||||
ARG nvcc_threads=8
|
ARG nvcc_threads=4
|
||||||
ENV NVCC_THREADS=$nvcc_threads
|
ENV NVCC_THREADS=$nvcc_threads
|
||||||
|
ARG torch_cuda_arch_list='8.0 8.6 8.9 9.0'
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
||||||
|
|
||||||
ARG USE_SCCACHE
|
ARG USE_SCCACHE
|
||||||
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
|
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
|
||||||
ARG SCCACHE_REGION_NAME=us-west-2
|
ARG SCCACHE_REGION_NAME=us-west-2
|
||||||
ARG SCCACHE_S3_NO_CREDENTIALS=0
|
ARG SCCACHE_S3_NO_CREDENTIALS=0
|
||||||
|
|
||||||
# Use sccache to speed up compilation
|
# if USE_SCCACHE is set, use sccache to speed up compilation
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
--mount=type=bind,source=.git,target=.git \
|
--mount=type=bind,source=.git,target=.git \
|
||||||
if [ "$USE_SCCACHE" = "1" ]; then \
|
if [ "$USE_SCCACHE" = "1" ]; then \
|
||||||
@ -190,9 +235,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
&& sccache --show-stats; \
|
&& sccache --show-stats; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
ARG torch_cuda_arch_list='8.0 8.6 8.9 9.0'
|
|
||||||
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
|
||||||
|
|
||||||
ARG vllm_target_device="cuda"
|
ARG vllm_target_device="cuda"
|
||||||
ENV VLLM_TARGET_DEVICE=${vllm_target_device}
|
ENV VLLM_TARGET_DEVICE=${vllm_target_device}
|
||||||
ENV CCACHE_DIR=/root/.cache/ccache
|
ENV CCACHE_DIR=/root/.cache/ccache
|
||||||
@ -206,10 +248,17 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
|
|||||||
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
|
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
|
||||||
python3 setup.py bdist_wheel --dist-dir=vllm-dist --py-limited-api=cp38; \
|
python3 setup.py bdist_wheel --dist-dir=vllm-dist --py-limited-api=cp38; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
RUN echo "[INFO] Listing current directory:" && \
|
||||||
|
ls -al && \
|
||||||
|
echo "[INFO] Showing torch_build_versions.txt content:" && \
|
||||||
|
cat torch_build_versions.txt
|
||||||
|
|
||||||
#################### WHEEL BUILD IMAGE ####################
|
#################### WHEEL BUILD IMAGE ####################
|
||||||
|
|
||||||
|
|
||||||
################### VLLM INSTALLED IMAGE ####################
|
################### VLLM INSTALLED IMAGE ####################
|
||||||
|
# Setup clean environment for vLLM for test and api server using ubuntu22.04 with AOT flashinfer
|
||||||
FROM ${FINAL_BASE_IMAGE} AS vllm-base
|
FROM ${FINAL_BASE_IMAGE} AS vllm-base
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
@ -217,7 +266,7 @@ ARG CUDA_VERSION
|
|||||||
ARG PYTHON_VERSION
|
ARG PYTHON_VERSION
|
||||||
ARG GET_PIP_URL
|
ARG GET_PIP_URL
|
||||||
|
|
||||||
# Only work with PyTorch manylinux builder
|
# TODO (huydhn): Only work with PyTorch manylinux builder
|
||||||
ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
|
ENV PATH="/opt/python/cp312-cp312/bin:${PATH}"
|
||||||
|
|
||||||
# prepare for environment starts
|
# prepare for environment starts
|
||||||
@ -226,19 +275,20 @@ WORKDIR /workspace
|
|||||||
# Install Python and other dependencies
|
# Install Python and other dependencies
|
||||||
RUN if command -v apt-get >/dev/null; then \
|
RUN if command -v apt-get >/dev/null; then \
|
||||||
apt-get update -y \
|
apt-get update -y \
|
||||||
&& apt-get install -y ccache software-properties-common git sudo vim python3-pip; \
|
&& apt-get install -y ccache software-properties-common git curl wget sudo vim \
|
||||||
|
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
||||||
|
&& apt-get update -y \
|
||||||
|
&& apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
|
||||||
|
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
|
||||||
|
&& update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
|
||||||
|
&& ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
|
||||||
|
&& curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION}; \
|
||||||
else \
|
else \
|
||||||
dnf install -y git wget sudo; \
|
dnf install -y git curl wget sudo; \
|
||||||
fi \
|
fi \
|
||||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
|
|
||||||
&& $HOME/.local/bin/uv venv /opt/venv --python ${PYTHON_VERSION} \
|
|
||||||
&& rm -f /usr/bin/python3 /usr/bin/python3-config /usr/bin/pip \
|
|
||||||
&& ln -s /opt/venv/bin/python3 /usr/bin/python3 \
|
|
||||||
&& ln -s /opt/venv/bin/python3-config /usr/bin/python3-config \
|
|
||||||
&& ln -s /opt/venv/bin/pip /usr/bin/pip \
|
|
||||||
&& python3 --version && python3 -m pip --version
|
&& python3 --version && python3 -m pip --version
|
||||||
|
|
||||||
# Get the torch versions, and whls used in previous stage
|
# Get the torch versions, and whls used in previous stagtes for consistency
|
||||||
COPY --from=base /workspace/torch_build_versions.txt ./torch_build_versions.txt
|
COPY --from=base /workspace/torch_build_versions.txt ./torch_build_versions.txt
|
||||||
COPY --from=base /workspace/xformers-dist /wheels/xformers
|
COPY --from=base /workspace/xformers-dist /wheels/xformers
|
||||||
COPY --from=build /workspace/vllm-dist /wheels/vllm
|
COPY --from=build /workspace/vllm-dist /wheels/vllm
|
||||||
@ -247,29 +297,33 @@ RUN echo "[INFO] Listing current directory before torch install step:" && \
|
|||||||
echo "[INFO] Showing torch_build_versions.txt content:" && \
|
echo "[INFO] Showing torch_build_versions.txt content:" && \
|
||||||
cat torch_build_versions.txt
|
cat torch_build_versions.txt
|
||||||
|
|
||||||
# Install uv for faster pip installs if not existed
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
||||||
python3 -m pip install uv==0.8.4
|
|
||||||
|
|
||||||
ENV UV_HTTP_TIMEOUT=500
|
|
||||||
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
|
||||||
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
|
||||||
ENV UV_LINK_MODE=copy
|
|
||||||
|
|
||||||
# Install build and runtime dependencies, this is needed for flashinfer install
|
# Install build and runtime dependencies, this is needed for flashinfer install
|
||||||
COPY requirements/build.txt requirements/build.txt
|
COPY requirements/build.txt requirements/build.txt
|
||||||
COPY use_existing_torch.py use_existing_torch.py
|
COPY use_existing_torch.py use_existing_torch.py
|
||||||
RUN python3 use_existing_torch.py
|
RUN python3 use_existing_torch.py
|
||||||
RUN cat requirements/build.txt
|
RUN cat requirements/build.txt
|
||||||
|
|
||||||
|
# Install uv for faster pip installs if not existed
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
if ! python3 -m uv --version > /dev/null 2>&1; then \
|
||||||
|
python3 -m pip install uv==0.8.4; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
ENV UV_HTTP_TIMEOUT=500
|
||||||
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||||
|
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
||||||
|
ENV UV_LINK_MODE=copy
|
||||||
|
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system -r requirements/build.txt
|
uv pip install --system -r requirements/build.txt
|
||||||
|
|
||||||
|
|
||||||
# Default mount file as placeholder, this just avoid the mount error
|
# Default mount file as placeholder, this just avoid the mount error
|
||||||
ARG TORCH_WHEELS_PATH="./requirements"
|
ARG TORCH_WHEELS_PATH="./requirements"
|
||||||
# Install torch, torchaudio and torchvision. If TORCH_WHEELS_PATH is default
|
# Install torch, torchaudio and torchvision
|
||||||
# to ./requirements, it will pull the nightly versions using pip. Otherwise,
|
# if TORCH_WHEELS_PATH is default "./requirements", it will pull the nightly versions using pip using torch_build_versions.txt
|
||||||
# it will use the local wheels from TORCH_WHEELS_PATH
|
# otherwise, it will use the whls from TORCH_WHEELS_PATH from the host machine
|
||||||
RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \
|
RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \
|
||||||
--mount=type=cache,target=/root/.cache/uv \
|
--mount=type=cache,target=/root/.cache/uv \
|
||||||
if [ -n "$TORCH_WHEELS_PATH" ] && [ "$TORCH_WHEELS_PATH" != "./requirements" ] && [ -d "/dist" ] && ls /dist/torch*.whl >/dev/null 2>&1; then \
|
if [ -n "$TORCH_WHEELS_PATH" ] && [ "$TORCH_WHEELS_PATH" != "./requirements" ] && [ -d "/dist" ] && ls /dist/torch*.whl >/dev/null 2>&1; then \
|
||||||
@ -283,9 +337,6 @@ RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \
|
|||||||
uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
||||||
uv pip install --system --pre apache-tvm-ffi==0.1.0b15
|
|
||||||
|
|
||||||
# Install the vllm wheel from previous stage
|
# Install the vllm wheel from previous stage
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system /wheels/vllm/*.whl --verbose
|
uv pip install --system /wheels/vllm/*.whl --verbose
|
||||||
@ -293,16 +344,18 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
# Install xformers wheel from previous stage
|
# Install xformers wheel from previous stage
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system /wheels/xformers/*.whl --verbose
|
uv pip install --system /wheels/xformers/*.whl --verbose
|
||||||
|
# Build flashinfer from source.
|
||||||
# Build FlashInfer from source
|
|
||||||
ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0'
|
ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0'
|
||||||
|
# install package for build flashinfer
|
||||||
|
# see issue: https://github.com/flashinfer-ai/flashinfer/issues/738
|
||||||
|
|
||||||
|
RUN pip freeze | grep -E 'setuptools|packaging|build'
|
||||||
|
|
||||||
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
||||||
|
# Build flashinfer for torch nightly from source around 10 mins
|
||||||
# TODO(elainewy): remove this once vllm commit is updated, and install flashinfer from pip
|
|
||||||
# see https://github.com/pytorch/pytorch/pull/165274#issuecomment-3408531784
|
|
||||||
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
|
||||||
|
# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
|
||||||
ARG FLASHINFER_GIT_REF="v0.2.14.post1"
|
ARG FLASHINFER_GIT_REF="v0.2.14.post1"
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
git clone --depth 1 --recursive --shallow-submodules \
|
git clone --depth 1 --recursive --shallow-submodules \
|
||||||
--branch ${FLASHINFER_GIT_REF} \
|
--branch ${FLASHINFER_GIT_REF} \
|
||||||
@ -314,7 +367,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
&& cd .. \
|
&& cd .. \
|
||||||
&& rm -rf flashinfer
|
&& rm -rf flashinfer
|
||||||
|
|
||||||
# Install FlashInfer
|
# install flashinfer python
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system wheels/flashinfer/*.whl --verbose
|
uv pip install --system wheels/flashinfer/*.whl --verbose
|
||||||
|
|
||||||
@ -324,6 +377,49 @@ RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm
|
|||||||
################### VLLM INSTALLED IMAGE ####################
|
################### VLLM INSTALLED IMAGE ####################
|
||||||
|
|
||||||
|
|
||||||
|
#################### UNITTEST IMAGE #############################
|
||||||
|
FROM vllm-base as test
|
||||||
|
|
||||||
|
ENV UV_HTTP_TIMEOUT=500
|
||||||
|
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||||
|
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
||||||
|
ENV UV_LINK_MODE=copy
|
||||||
|
|
||||||
|
COPY tests/ tests/
|
||||||
|
COPY examples examples
|
||||||
|
COPY benchmarks benchmarks
|
||||||
|
COPY ./vllm/collect_env.py .
|
||||||
|
COPY requirements/common.txt requirements/common.txt
|
||||||
|
COPY use_existing_torch.py use_existing_torch.py
|
||||||
|
COPY pyproject.toml pyproject.toml
|
||||||
|
# Install build and runtime dependencies without stable torch version
|
||||||
|
COPY requirements/nightly_torch_test.txt requirements/nightly_torch_test.txt
|
||||||
|
|
||||||
|
RUN python3 use_existing_torch.py
|
||||||
|
|
||||||
|
# install packages
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install --system -r requirements/common.txt
|
||||||
|
# enable fast downloads from hf (for testing)
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install --system hf_transfer
|
||||||
|
ENV HF_HUB_ENABLE_HF_TRANSFER 1
|
||||||
|
|
||||||
|
# install development dependencies (for testing)
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install --system -e tests/vllm_test_utils
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install --system -r requirements/nightly_torch_test.txt
|
||||||
|
|
||||||
|
# Logging to confirm the torch versions
|
||||||
|
RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
|
||||||
|
|
||||||
|
# Logging to confirm all the packages are installed
|
||||||
|
RUN pip freeze
|
||||||
|
|
||||||
|
#################### UNITTEST IMAGE #############################
|
||||||
|
|
||||||
#################### EXPORT STAGE ####################
|
#################### EXPORT STAGE ####################
|
||||||
FROM scratch as export-wheels
|
FROM scratch as export-wheels
|
||||||
|
|
||||||
125
.github/copilot-instructions.md
vendored
125
.github/copilot-instructions.md
vendored
@ -1,125 +0,0 @@
|
|||||||
# PyTorch Copilot Instructions
|
|
||||||
|
|
||||||
This is the PyTorch machine learning framework codebase. These instructions help AI agents navigate and contribute effectively.
|
|
||||||
|
|
||||||
## Architecture Overview
|
|
||||||
|
|
||||||
### Core Components
|
|
||||||
|
|
||||||
- **c10/** - Core library (C++-10 compatible) for essential, binary-size-conscious functionality
|
|
||||||
- **aten/** - ATen tensor library (C++), PyTorch's foundation without autograd
|
|
||||||
- `aten/src/ATen/native/` - Modern operator implementations (CPU/CUDA/MPS/sparse)
|
|
||||||
- `aten/src/ATen/native/native_functions.yaml` - **Critical**: Declarative operator registry
|
|
||||||
- **torch/** - Python bindings and public API
|
|
||||||
- `torch/csrc/` - C++ Python bindings (hand-written and generated)
|
|
||||||
- `torch/csrc/autograd/` - Reverse-mode automatic differentiation
|
|
||||||
- `torch/csrc/jit/` - TorchScript JIT compiler
|
|
||||||
- **torchgen/** - Code generation tooling that reads `native_functions.yaml`
|
|
||||||
- **tools/** - Build scripts, autograd derivatives, code generation
|
|
||||||
|
|
||||||
### The Code Generation Workflow
|
|
||||||
|
|
||||||
**Most operator changes require editing `native_functions.yaml`**, not direct C++ files. This YAML file:
|
|
||||||
1. Declares operator signatures, variants (function/method), and dispatch behavior
|
|
||||||
2. Gets processed by `torchgen/` to generate C++/Python bindings
|
|
||||||
3. Produces headers in `build/aten/src/ATen/` during compilation
|
|
||||||
|
|
||||||
Example entry structure:
|
|
||||||
```yaml
|
|
||||||
- func: my_op(Tensor self, Scalar alpha=1) -> Tensor
|
|
||||||
variants: function, method
|
|
||||||
dispatch:
|
|
||||||
CPU: my_op_cpu
|
|
||||||
CUDA: my_op_cuda
|
|
||||||
```
|
|
||||||
|
|
||||||
After editing `native_functions.yaml`, implement kernels in `aten/src/ATen/native/` (see `aten/src/ATen/native/README.md`).
|
|
||||||
|
|
||||||
## Development Workflows
|
|
||||||
|
|
||||||
### Building from Source
|
|
||||||
|
|
||||||
**Never run `setup.py` directly** - use pip with editable install:
|
|
||||||
```bash
|
|
||||||
python -m pip install --no-build-isolation -v -e .
|
|
||||||
```
|
|
||||||
|
|
||||||
Speed up builds:
|
|
||||||
- `DEBUG=1` - Debug symbols with `-g -O0`
|
|
||||||
- `USE_CUDA=0` - Skip CUDA compilation
|
|
||||||
- `BUILD_TEST=0` - Skip C++ test binaries
|
|
||||||
- Install `ninja` (`pip install ninja`) for faster builds
|
|
||||||
- Use `ccache` for incremental compilation caching
|
|
||||||
|
|
||||||
Rebuild specific targets: `(cd build && ninja <target>)`
|
|
||||||
|
|
||||||
### Testing
|
|
||||||
|
|
||||||
**Critical**: DO NOT run entire test suites. Run specific tests only:
|
|
||||||
```bash
|
|
||||||
python test/test_torch.py TestTorch.test_specific_case
|
|
||||||
```
|
|
||||||
|
|
||||||
**Test structure**: All tests use `torch.testing._internal.common_utils`:
|
|
||||||
```python
|
|
||||||
from torch.testing._internal.common_utils import run_tests, TestCase
|
|
||||||
|
|
||||||
class TestFeature(TestCase):
|
|
||||||
def test_something(self):
|
|
||||||
# Use self.assertEqual for tensor comparisons
|
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
run_tests()
|
|
||||||
```
|
|
||||||
|
|
||||||
**For bug fixes**: Create a standalone reproduction script first, verify it fails, then fix and add to appropriate test file.
|
|
||||||
|
|
||||||
### Linting
|
|
||||||
|
|
||||||
Run linter (not pre-commit): `lintrunner -a` (auto-applies fixes)
|
|
||||||
|
|
||||||
## Project-Specific Conventions
|
|
||||||
|
|
||||||
### Memory and Storage
|
|
||||||
- **Storage is never nullptr** (but `StorageImpl.data` may be nullptr for unallocated outputs)
|
|
||||||
- CUDA device info lives in storage objects
|
|
||||||
|
|
||||||
### Python-C++ Integration (`torch/csrc/`)
|
|
||||||
- Always include `Python.h` **first** to avoid `_XOPEN_SOURCE` redefinition errors
|
|
||||||
- Use `pybind11::gil_scoped_acquire` before calling Python API or using `THPObjectPtr`
|
|
||||||
- Wrap entry points with `HANDLE_TH_ERRORS` / `END_HANDLE_TH_ERRORS` for exception conversion
|
|
||||||
|
|
||||||
### Dispatch System
|
|
||||||
- PyTorch uses operator dispatch to route calls to backend-specific kernels
|
|
||||||
- Prefer `CompositeExplicitAutograd` dispatch when writing device-agnostic compound ops
|
|
||||||
- See `aten/src/ATen/native/README.md` for dispatch keyword guidance
|
|
||||||
|
|
||||||
## Git Workflow (AI Agent Specific)
|
|
||||||
|
|
||||||
When preparing PRs from this environment:
|
|
||||||
```bash
|
|
||||||
git stash -u
|
|
||||||
git reset --hard $(cat /tmp/orig_work.txt) # Reset to LOCAL branch
|
|
||||||
git stash pop
|
|
||||||
# Resolve conflicts if necessary
|
|
||||||
```
|
|
||||||
|
|
||||||
## Common Gotchas
|
|
||||||
|
|
||||||
1. **Editing generated files** - If it's in `build/`, don't edit it. Edit the source template or `native_functions.yaml`
|
|
||||||
2. **NVCC template compilation** - NVCC is stricter about C++ than gcc/clang; code working on Linux may fail Windows CI
|
|
||||||
3. **Windows symbol visibility** - Use `TORCH_API` macros for exported symbols (required on Windows, optional on Linux)
|
|
||||||
4. **No internet access** - DO NOT attempt to install dependencies during development
|
|
||||||
|
|
||||||
## Key Files Reference
|
|
||||||
|
|
||||||
- `AGENTS.md` - Instructions specific to AI coding agents
|
|
||||||
- `CONTRIBUTING.md` - Comprehensive human contributor guide
|
|
||||||
- `GLOSSARY.md` - Terminology (ATen, kernels, operations, JIT, TorchScript)
|
|
||||||
- `aten/src/ATen/native/README.md` - Operator implementation guide
|
|
||||||
- `tools/autograd/derivatives.yaml` - Gradient definitions for autograd
|
|
||||||
|
|
||||||
## Performance Debugging
|
|
||||||
|
|
||||||
Use `TORCH_SHOW_CPP_STACKTRACES=1` for C++ traces in Python errors. For profiling, prefer `py-spy` over manual instrumentation.
|
|
||||||
9
.github/label_to_label.yml
vendored
9
.github/label_to_label.yml
vendored
@ -15,11 +15,6 @@
|
|||||||
- "module: reinplacing"
|
- "module: reinplacing"
|
||||||
then:
|
then:
|
||||||
- "module: pt2-dispatcher"
|
- "module: pt2-dispatcher"
|
||||||
- any:
|
|
||||||
- "vllm-compile"
|
|
||||||
then:
|
|
||||||
- "module: vllm"
|
|
||||||
- "oncall: pt2"
|
|
||||||
- any:
|
- any:
|
||||||
- "module: vmap"
|
- "module: vmap"
|
||||||
then:
|
then:
|
||||||
@ -32,6 +27,10 @@
|
|||||||
- "module: pt2 optimizer"
|
- "module: pt2 optimizer"
|
||||||
then:
|
then:
|
||||||
- "module: dynamo"
|
- "module: dynamo"
|
||||||
|
- any:
|
||||||
|
- "module: flex attention"
|
||||||
|
then:
|
||||||
|
- "module: higher order operators"
|
||||||
- any:
|
- any:
|
||||||
- "module: aotinductor"
|
- "module: aotinductor"
|
||||||
then:
|
then:
|
||||||
|
|||||||
45
.github/labeler.yml
vendored
45
.github/labeler.yml
vendored
@ -133,48 +133,3 @@
|
|||||||
|
|
||||||
"ciflow/vllm":
|
"ciflow/vllm":
|
||||||
- .github/ci_commit_pins/vllm.txt
|
- .github/ci_commit_pins/vllm.txt
|
||||||
|
|
||||||
"ciflow/b200":
|
|
||||||
- test/test_matmul_cuda.py
|
|
||||||
- test/test_scaled_matmul_cuda.py
|
|
||||||
- test/inductor/test_fp8.py
|
|
||||||
- aten/src/ATen/native/cuda/*Blas.cpp
|
|
||||||
- aten/src/ATen/cuda/CUDA*Blas.*
|
|
||||||
- torch/**/*cublas*
|
|
||||||
- torch/_inductor/kernel/mm.py
|
|
||||||
- test/inductor/test_max_autotune.py
|
|
||||||
- third_party/fbgemm
|
|
||||||
|
|
||||||
"ciflow/h100":
|
|
||||||
- test/test_matmul_cuda.py
|
|
||||||
- test/test_scaled_matmul_cuda.py
|
|
||||||
- test/inductor/test_fp8.py
|
|
||||||
- aten/src/ATen/native/cuda/*Blas.cpp
|
|
||||||
- aten/src/ATen/cuda/CUDA*Blas.*
|
|
||||||
- torch/**/*cublas*
|
|
||||||
- torch/_inductor/kernel/mm.py
|
|
||||||
- test/inductor/test_max_autotune.py
|
|
||||||
- third_party/fbgemm
|
|
||||||
|
|
||||||
"ciflow/rocm":
|
|
||||||
- test/test_matmul_cuda.py
|
|
||||||
- test/test_scaled_matmul_cuda.py
|
|
||||||
- test/inductor/test_fp8.py
|
|
||||||
- aten/src/ATen/native/cuda/*Blas.cpp
|
|
||||||
- aten/src/ATen/cuda/CUDA*Blas.*
|
|
||||||
- torch/_inductor/kernel/mm.py
|
|
||||||
- test/inductor/test_max_autotune.py
|
|
||||||
- third_party/fbgemm
|
|
||||||
|
|
||||||
"ciflow/mps":
|
|
||||||
- aten/src/ATen/mps/**
|
|
||||||
- aten/src/ATen/native/mps/**
|
|
||||||
- torch/_inductor/codegen/mps.py
|
|
||||||
- test/test_mps.py
|
|
||||||
- test/inductor/test_mps_basic.py
|
|
||||||
|
|
||||||
"ciflow/h100-symm-mem":
|
|
||||||
- torch/csrc/distributed/c10d/symm_mem/**
|
|
||||||
- torch/distributed/_symmetric_memory/**
|
|
||||||
- test/distributed/**/*mem*
|
|
||||||
- test/distributed/**/*mem*/**
|
|
||||||
|
|||||||
20
.github/merge_rules.yaml
vendored
20
.github/merge_rules.yaml
vendored
@ -540,26 +540,6 @@
|
|||||||
- Lint
|
- Lint
|
||||||
- pull
|
- pull
|
||||||
|
|
||||||
- name: PrivateUse1
|
|
||||||
patterns:
|
|
||||||
- torch/accelerator/**
|
|
||||||
- torch/utils/backend_registration.py
|
|
||||||
- torch/csrc/acc/**
|
|
||||||
- torch/csrc/DeviceAccelerator.*
|
|
||||||
- torch/csrc/profiler/standalone/privateuse1_observer.*
|
|
||||||
- aten/src/ATen/DeviceAccelerator.*
|
|
||||||
- aten/src/ATen/core/GeneratorForPrivateuseone.*
|
|
||||||
- aten/src/ATen/detail/PrivateUse1HooksInterface.*
|
|
||||||
- docs/source/accelerator/**
|
|
||||||
- test/cpp_extensions/open_registration_extension/torch_openreg/**
|
|
||||||
approved_by:
|
|
||||||
- albanD
|
|
||||||
- fffrog
|
|
||||||
mandatory_checks_name:
|
|
||||||
- EasyCLA
|
|
||||||
- Lint
|
|
||||||
- pull
|
|
||||||
|
|
||||||
- name: superuser
|
- name: superuser
|
||||||
patterns:
|
patterns:
|
||||||
- '*'
|
- '*'
|
||||||
|
|||||||
1
.github/nitpicks.yml
vendored
1
.github/nitpicks.yml
vendored
@ -10,4 +10,3 @@
|
|||||||
pathFilter:
|
pathFilter:
|
||||||
- 'torch/csrc/inductor/aoti_torch/c/*'
|
- 'torch/csrc/inductor/aoti_torch/c/*'
|
||||||
- 'torch/csrc/inductor/aoti_torch/generated/*'
|
- 'torch/csrc/inductor/aoti_torch/generated/*'
|
||||||
- 'torch/csrc/stable/c/*'
|
|
||||||
|
|||||||
12
.github/pytorch-probot.yml
vendored
12
.github/pytorch-probot.yml
vendored
@ -2,7 +2,6 @@ tracking_issue: 24422
|
|||||||
ciflow_tracking_issue: 64124
|
ciflow_tracking_issue: 64124
|
||||||
ciflow_push_tags:
|
ciflow_push_tags:
|
||||||
- ciflow/b200
|
- ciflow/b200
|
||||||
- ciflow/b200-distributed
|
|
||||||
- ciflow/b200-symm-mem
|
- ciflow/b200-symm-mem
|
||||||
- ciflow/binaries
|
- ciflow/binaries
|
||||||
- ciflow/binaries_libtorch
|
- ciflow/binaries_libtorch
|
||||||
@ -16,32 +15,23 @@ ciflow_push_tags:
|
|||||||
- ciflow/inductor-micro-benchmark
|
- ciflow/inductor-micro-benchmark
|
||||||
- ciflow/inductor-micro-benchmark-cpu-x86
|
- ciflow/inductor-micro-benchmark-cpu-x86
|
||||||
- ciflow/inductor-perf-compare
|
- ciflow/inductor-perf-compare
|
||||||
- ciflow/inductor-perf-test-nightly-rocm-mi300
|
- ciflow/inductor-perf-test-nightly-rocm
|
||||||
- ciflow/inductor-perf-test-nightly-rocm-mi355
|
|
||||||
- ciflow/inductor-perf-test-nightly-x86-zen
|
- ciflow/inductor-perf-test-nightly-x86-zen
|
||||||
- ciflow/inductor-perf-test-nightly-xpu
|
|
||||||
- ciflow/inductor-periodic
|
- ciflow/inductor-periodic
|
||||||
- ciflow/inductor-rocm
|
- ciflow/inductor-rocm
|
||||||
- ciflow/inductor-rocm-mi200
|
|
||||||
- ciflow/inductor-rocm-mi300
|
|
||||||
- ciflow/linux-aarch64
|
- ciflow/linux-aarch64
|
||||||
- ciflow/mps
|
- ciflow/mps
|
||||||
- ciflow/nightly
|
- ciflow/nightly
|
||||||
- ciflow/op-benchmark
|
- ciflow/op-benchmark
|
||||||
- ciflow/periodic
|
- ciflow/periodic
|
||||||
- ciflow/periodic-rocm-mi200
|
|
||||||
- ciflow/periodic-rocm-mi300
|
- ciflow/periodic-rocm-mi300
|
||||||
- ciflow/pull
|
- ciflow/pull
|
||||||
- ciflow/quantization-periodic
|
- ciflow/quantization-periodic
|
||||||
- ciflow/riscv64
|
- ciflow/riscv64
|
||||||
- ciflow/rocm
|
- ciflow/rocm
|
||||||
- ciflow/rocm-mi200
|
|
||||||
- ciflow/rocm-mi300
|
- ciflow/rocm-mi300
|
||||||
- ciflow/rocm-mi355
|
|
||||||
- ciflow/rocm-navi31
|
|
||||||
- ciflow/s390
|
- ciflow/s390
|
||||||
- ciflow/slow
|
- ciflow/slow
|
||||||
- ciflow/slow-rocm-mi200
|
|
||||||
- ciflow/torchbench
|
- ciflow/torchbench
|
||||||
- ciflow/triton_binaries
|
- ciflow/triton_binaries
|
||||||
- ciflow/trunk
|
- ciflow/trunk
|
||||||
|
|||||||
3
.github/scripts/delete_old_branches.py
vendored
3
.github/scripts/delete_old_branches.py
vendored
@ -1,11 +1,10 @@
|
|||||||
# Delete old branches
|
# Delete old branches
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from collections.abc import Callable
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any, Callable
|
||||||
|
|
||||||
from github_utils import gh_fetch_json_dict, gh_graphql
|
from github_utils import gh_fetch_json_dict, gh_graphql
|
||||||
from gitutils import GitRepo
|
from gitutils import GitRepo
|
||||||
|
|||||||
BIN
.github/scripts/drci_mocks.json.gz
vendored
BIN
.github/scripts/drci_mocks.json.gz
vendored
Binary file not shown.
5
.github/scripts/filter_test_configs.py
vendored
5
.github/scripts/filter_test_configs.py
vendored
@ -8,11 +8,10 @@ import re
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Callable
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import cache
|
from functools import cache
|
||||||
from logging import info
|
from logging import info
|
||||||
from typing import Any, Optional
|
from typing import Any, Callable, Optional
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
@ -513,8 +512,6 @@ def perform_misc_tasks(
|
|||||||
"keep-going",
|
"keep-going",
|
||||||
branch == MAIN_BRANCH
|
branch == MAIN_BRANCH
|
||||||
or bool(tag and re.match(r"^trunk/[a-f0-9]{40}$", tag))
|
or bool(tag and re.match(r"^trunk/[a-f0-9]{40}$", tag))
|
||||||
# Pattern for tags created via manual run on HUD
|
|
||||||
or bool(tag and re.match(r"^ciflow/[^/]+/[a-f0-9]{40}$", tag))
|
|
||||||
or check_for_setting(labels, pr_body, "keep-going"),
|
or check_for_setting(labels, pr_body, "keep-going"),
|
||||||
)
|
)
|
||||||
set_output(
|
set_output(
|
||||||
|
|||||||
151
.github/scripts/generate_binary_build_matrix.py
vendored
151
.github/scripts/generate_binary_build_matrix.py
vendored
@ -11,33 +11,26 @@ architectures:
|
|||||||
* Latest XPU
|
* Latest XPU
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
SCRIPT_DIR = Path(__file__).absolute().parent
|
# NOTE: Please also update the CUDA sources in `PIP_SOURCES` in tools/nightly.py when changing this
|
||||||
REPO_ROOT = SCRIPT_DIR.parent.parent
|
CUDA_ARCHES = ["12.6", "12.8", "13.0"]
|
||||||
|
|
||||||
|
|
||||||
CUDA_ARCHES = ["12.6", "12.8", "12.9", "13.0"]
|
|
||||||
CUDA_STABLE = "12.8"
|
CUDA_STABLE = "12.8"
|
||||||
CUDA_ARCHES_FULL_VERSION = {
|
CUDA_ARCHES_FULL_VERSION = {
|
||||||
"12.6": "12.6.3",
|
"12.6": "12.6.3",
|
||||||
"12.8": "12.8.1",
|
"12.8": "12.8.1",
|
||||||
"12.9": "12.9.1",
|
|
||||||
"13.0": "13.0.0",
|
"13.0": "13.0.0",
|
||||||
}
|
}
|
||||||
CUDA_ARCHES_CUDNN_VERSION = {
|
CUDA_ARCHES_CUDNN_VERSION = {
|
||||||
"12.6": "9",
|
"12.6": "9",
|
||||||
"12.8": "9",
|
"12.8": "9",
|
||||||
"12.9": "9",
|
|
||||||
"13.0": "9",
|
"13.0": "9",
|
||||||
}
|
}
|
||||||
|
|
||||||
ROCM_ARCHES = ["7.0", "7.1"]
|
# NOTE: Please also update the ROCm sources in `PIP_SOURCES` in tools/nightly.py when changing this
|
||||||
|
ROCM_ARCHES = ["6.4", "7.0"]
|
||||||
|
|
||||||
XPU_ARCHES = ["xpu"]
|
XPU_ARCHES = ["xpu"]
|
||||||
|
|
||||||
@ -45,7 +38,7 @@ CPU_AARCH64_ARCH = ["cpu-aarch64"]
|
|||||||
|
|
||||||
CPU_S390X_ARCH = ["cpu-s390x"]
|
CPU_S390X_ARCH = ["cpu-s390x"]
|
||||||
|
|
||||||
CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64", "12.9-aarch64", "13.0-aarch64"]
|
CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64", "13.0-aarch64"]
|
||||||
|
|
||||||
|
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||||
@ -61,7 +54,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
|||||||
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | "
|
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
||||||
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
||||||
"nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | "
|
"nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | "
|
||||||
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | "
|
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | "
|
||||||
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | "
|
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | "
|
||||||
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'"
|
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'"
|
||||||
@ -78,44 +71,27 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
|||||||
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | "
|
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
||||||
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
||||||
"nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | "
|
"nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | "
|
||||||
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | "
|
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | "
|
||||||
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | "
|
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | "
|
||||||
"nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'"
|
"nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'"
|
||||||
),
|
),
|
||||||
"12.9": (
|
|
||||||
"nvidia-cuda-nvrtc-cu12==12.9.86; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cuda-runtime-cu12==12.9.79; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cuda-cupti-cu12==12.9.79; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cublas-cu12==12.9.1.4; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cufft-cu12==11.4.1.4; platform_system == 'Linux' | "
|
|
||||||
"nvidia-curand-cu12==10.3.10.19; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cusolver-cu12==11.7.5.82; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
|
||||||
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
|
||||||
"nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' | "
|
|
||||||
"nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' | "
|
|
||||||
"nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' | "
|
|
||||||
"nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux'"
|
|
||||||
),
|
|
||||||
"13.0": (
|
"13.0": (
|
||||||
"nvidia-cuda-nvrtc==13.0.88; platform_system == 'Linux' | "
|
"nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | "
|
||||||
"nvidia-cuda-runtime==13.0.96; platform_system == 'Linux' | "
|
"nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | "
|
||||||
"nvidia-cuda-cupti==13.0.85; platform_system == 'Linux' | "
|
"nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | "
|
||||||
"nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | "
|
"nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | "
|
||||||
"nvidia-cublas==13.1.0.3; platform_system == 'Linux' | "
|
"nvidia-cublas==13.0.0.19; platform_system == 'Linux' | "
|
||||||
"nvidia-cufft==12.0.0.61; platform_system == 'Linux' | "
|
"nvidia-cufft==12.0.0.15; platform_system == 'Linux' | "
|
||||||
"nvidia-curand==10.4.0.35; platform_system == 'Linux' | "
|
"nvidia-curand==10.4.0.35; platform_system == 'Linux' | "
|
||||||
"nvidia-cusolver==12.0.4.66; platform_system == 'Linux' | "
|
"nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparse==12.6.3.3; platform_system == 'Linux' | "
|
"nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | "
|
"nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | "
|
||||||
"nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | "
|
"nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | "
|
||||||
"nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' | "
|
"nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | "
|
||||||
"nvidia-nvtx==13.0.85; platform_system == 'Linux' | "
|
"nvidia-nvtx==13.0.39; platform_system == 'Linux' | "
|
||||||
"nvidia-nvjitlink==13.0.88; platform_system == 'Linux' | "
|
"nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | "
|
||||||
"nvidia-cufile==1.15.1.6; platform_system == 'Linux'"
|
"nvidia-cufile==1.15.0.42; platform_system == 'Linux'"
|
||||||
),
|
),
|
||||||
"xpu": (
|
"xpu": (
|
||||||
"intel-cmplr-lib-rt==2025.2.1 | "
|
"intel-cmplr-lib-rt==2025.2.1 | "
|
||||||
@ -142,48 +118,9 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Used by tools/nightly.py
|
|
||||||
PYTORCH_NIGHTLY_PIP_INDEX_URL = "https://download.pytorch.org/whl/nightly"
|
|
||||||
NIGHTLY_SOURCE_MATRIX = {
|
|
||||||
"cpu": dict(
|
|
||||||
name="cpu",
|
|
||||||
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/cpu",
|
|
||||||
supported_platforms=["Linux", "macOS", "Windows"],
|
|
||||||
accelerator="cpu",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
CUDA_NIGHTLY_SOURCE_MATRIX = {
|
|
||||||
f"cuda-{major}.{minor}": dict(
|
|
||||||
name=f"cuda-{major}.{minor}",
|
|
||||||
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/cu{major}{minor}",
|
|
||||||
supported_platforms=["Linux", "Windows"],
|
|
||||||
accelerator="cuda",
|
|
||||||
)
|
|
||||||
for major, minor in (map(int, version.split(".")) for version in CUDA_ARCHES)
|
|
||||||
}
|
|
||||||
ROCM_NIGHTLY_SOURCE_MATRIX = {
|
|
||||||
f"rocm-{major}.{minor}": dict(
|
|
||||||
name=f"rocm-{major}.{minor}",
|
|
||||||
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/rocm{major}.{minor}",
|
|
||||||
supported_platforms=["Linux"],
|
|
||||||
accelerator="rocm",
|
|
||||||
)
|
|
||||||
for major, minor in (map(int, version.split(".")) for version in ROCM_ARCHES)
|
|
||||||
}
|
|
||||||
XPU_NIGHTLY_SOURCE_MATRIX = {
|
|
||||||
"xpu": dict(
|
|
||||||
name="xpu",
|
|
||||||
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/xpu",
|
|
||||||
supported_platforms=["Linux"],
|
|
||||||
accelerator="xpu",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
NIGHTLY_SOURCE_MATRIX.update(CUDA_NIGHTLY_SOURCE_MATRIX)
|
|
||||||
NIGHTLY_SOURCE_MATRIX.update(ROCM_NIGHTLY_SOURCE_MATRIX)
|
|
||||||
NIGHTLY_SOURCE_MATRIX.update(XPU_NIGHTLY_SOURCE_MATRIX)
|
|
||||||
|
|
||||||
|
|
||||||
def get_nccl_wheel_version(arch_version: str) -> str:
|
def get_nccl_wheel_version(arch_version: str) -> str:
|
||||||
|
import re
|
||||||
|
|
||||||
requirements = map(
|
requirements = map(
|
||||||
str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
|
str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
|
||||||
)
|
)
|
||||||
@ -191,14 +128,17 @@ def get_nccl_wheel_version(arch_version: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def read_nccl_pin(arch_version: str) -> str:
|
def read_nccl_pin(arch_version: str) -> str:
|
||||||
nccl_pin_path = (
|
from pathlib import Path
|
||||||
REPO_ROOT
|
|
||||||
/ ".ci"
|
nccl_pin_path = os.path.join(
|
||||||
/ "docker"
|
Path(__file__).absolute().parents[2],
|
||||||
/ "ci_commit_pins"
|
".ci",
|
||||||
/ f"nccl-cu{arch_version[:2]}.txt"
|
"docker",
|
||||||
|
"ci_commit_pins",
|
||||||
|
f"nccl-cu{arch_version[:2]}.txt",
|
||||||
)
|
)
|
||||||
return nccl_pin_path.read_text().strip()
|
with open(nccl_pin_path) as f:
|
||||||
|
return f.read().strip()
|
||||||
|
|
||||||
|
|
||||||
def validate_nccl_dep_consistency(arch_version: str) -> None:
|
def validate_nccl_dep_consistency(arch_version: str) -> None:
|
||||||
@ -206,8 +146,7 @@ def validate_nccl_dep_consistency(arch_version: str) -> None:
|
|||||||
wheel_ver = get_nccl_wheel_version(arch_version)
|
wheel_ver = get_nccl_wheel_version(arch_version)
|
||||||
if not nccl_release_tag.startswith(f"v{wheel_ver}"):
|
if not nccl_release_tag.startswith(f"v{wheel_ver}"):
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"{arch_version} NCCL release tag version {nccl_release_tag} "
|
f"{arch_version} NCCL release tag version {nccl_release_tag} does not correspond to wheel version {wheel_ver}"
|
||||||
f"does not correspond to wheel version {wheel_ver}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -283,11 +222,7 @@ def generate_libtorch_matrix(
|
|||||||
arches += CUDA_ARCHES
|
arches += CUDA_ARCHES
|
||||||
arches += ROCM_ARCHES
|
arches += ROCM_ARCHES
|
||||||
elif os == "windows":
|
elif os == "windows":
|
||||||
# TODO (huydhn): Only build CUDA 12.9 for Linux. This logic is to be cleaned up
|
arches += CUDA_ARCHES
|
||||||
# in 2.10
|
|
||||||
windows_cuda_arches = CUDA_ARCHES.copy()
|
|
||||||
windows_cuda_arches.remove("12.9")
|
|
||||||
arches += windows_cuda_arches
|
|
||||||
if libtorch_variants is None:
|
if libtorch_variants is None:
|
||||||
libtorch_variants = [
|
libtorch_variants = [
|
||||||
"shared-with-deps",
|
"shared-with-deps",
|
||||||
@ -351,11 +286,7 @@ def generate_wheels_matrix(
|
|||||||
if os == "linux":
|
if os == "linux":
|
||||||
arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
|
arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
|
||||||
elif os == "windows":
|
elif os == "windows":
|
||||||
# TODO (huydhn): Only build CUDA 12.9 for Linux. This logic is to be cleaned up
|
arches += CUDA_ARCHES + XPU_ARCHES
|
||||||
# in 2.10
|
|
||||||
windows_cuda_arches = CUDA_ARCHES.copy()
|
|
||||||
windows_cuda_arches.remove("12.9")
|
|
||||||
arches += windows_cuda_arches + XPU_ARCHES
|
|
||||||
elif os == "linux-aarch64":
|
elif os == "linux-aarch64":
|
||||||
# Separate new if as the CPU type is different and
|
# Separate new if as the CPU type is different and
|
||||||
# uses different build/test scripts
|
# uses different build/test scripts
|
||||||
@ -391,7 +322,7 @@ def generate_wheels_matrix(
|
|||||||
# cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
|
# cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
|
||||||
|
|
||||||
if (
|
if (
|
||||||
arch_version in ["13.0", "12.9", "12.8", "12.6"]
|
arch_version in ["13.0", "12.8", "12.6"]
|
||||||
and os == "linux"
|
and os == "linux"
|
||||||
or arch_version in CUDA_AARCH64_ARCHES
|
or arch_version in CUDA_AARCH64_ARCHES
|
||||||
):
|
):
|
||||||
@ -454,14 +385,6 @@ def generate_wheels_matrix(
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
arch_version = ""
|
validate_nccl_dep_consistency("13.0")
|
||||||
for arch_version in CUDA_ARCHES:
|
validate_nccl_dep_consistency("12.8")
|
||||||
validate_nccl_dep_consistency(arch_version)
|
validate_nccl_dep_consistency("12.6")
|
||||||
del arch_version
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Used by tools/nightly.py
|
|
||||||
(SCRIPT_DIR / "nightly_source_matrix.json").write_text(
|
|
||||||
json.dumps(NIGHTLY_SOURCE_MATRIX, indent=4) + "\n"
|
|
||||||
)
|
|
||||||
|
|||||||
3
.github/scripts/get_workflow_job_id.py
vendored
3
.github/scripts/get_workflow_job_id.py
vendored
@ -11,8 +11,7 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import urllib
|
import urllib
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from collections.abc import Callable
|
from typing import Any, Callable, Optional
|
||||||
from typing import Any, Optional
|
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
4
.github/scripts/github_utils.py
vendored
4
.github/scripts/github_utils.py
vendored
@ -3,9 +3,8 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Callable
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any, cast, Optional, Union
|
from typing import Any, Callable, cast, Optional, Union
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
@ -19,7 +18,6 @@ class GitHubComment:
|
|||||||
body_text: str
|
body_text: str
|
||||||
created_at: str
|
created_at: str
|
||||||
author_login: str
|
author_login: str
|
||||||
author_url: Optional[str]
|
|
||||||
author_association: str
|
author_association: str
|
||||||
editor_login: Optional[str]
|
editor_login: Optional[str]
|
||||||
database_id: int
|
database_id: int
|
||||||
|
|||||||
4
.github/scripts/gitutils.py
vendored
4
.github/scripts/gitutils.py
vendored
@ -4,10 +4,10 @@ import os
|
|||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections.abc import Callable, Iterator
|
from collections.abc import Iterator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import Any, cast, Optional, TypeVar, Union
|
from typing import Any, Callable, cast, Optional, TypeVar, Union
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|||||||
BIN
.github/scripts/gql_mocks.json.gz
vendored
BIN
.github/scripts/gql_mocks.json.gz
vendored
Binary file not shown.
2
.github/scripts/test_check_labels.py
vendored
2
.github/scripts/test_check_labels.py
vendored
@ -38,7 +38,6 @@ def mock_get_comments() -> list[GitHubComment]:
|
|||||||
body_text="mock_body_text",
|
body_text="mock_body_text",
|
||||||
created_at="",
|
created_at="",
|
||||||
author_login="",
|
author_login="",
|
||||||
author_url=None,
|
|
||||||
author_association="",
|
author_association="",
|
||||||
editor_login=None,
|
editor_login=None,
|
||||||
database_id=1,
|
database_id=1,
|
||||||
@ -49,7 +48,6 @@ def mock_get_comments() -> list[GitHubComment]:
|
|||||||
body_text=" #" + LABEL_ERR_MSG_TITLE.replace("`", ""),
|
body_text=" #" + LABEL_ERR_MSG_TITLE.replace("`", ""),
|
||||||
created_at="",
|
created_at="",
|
||||||
author_login=BOT_AUTHORS[1],
|
author_login=BOT_AUTHORS[1],
|
||||||
author_url=None,
|
|
||||||
author_association="",
|
author_association="",
|
||||||
editor_login=None,
|
editor_login=None,
|
||||||
database_id=2,
|
database_id=2,
|
||||||
|
|||||||
18
.github/scripts/test_trymerge.py
vendored
18
.github/scripts/test_trymerge.py
vendored
@ -32,7 +32,6 @@ from trymerge import (
|
|||||||
main as trymerge_main,
|
main as trymerge_main,
|
||||||
MandatoryChecksMissingError,
|
MandatoryChecksMissingError,
|
||||||
MergeRule,
|
MergeRule,
|
||||||
PostCommentError,
|
|
||||||
RE_GHSTACK_DESC,
|
RE_GHSTACK_DESC,
|
||||||
read_merge_rules,
|
read_merge_rules,
|
||||||
remove_job_name_suffix,
|
remove_job_name_suffix,
|
||||||
@ -589,23 +588,6 @@ class TestTryMerge(TestCase):
|
|||||||
self.assertEqual(mock_merge_base, pr.get_merge_base())
|
self.assertEqual(mock_merge_base, pr.get_merge_base())
|
||||||
mocked_gh_fetch_merge_base.assert_called_once()
|
mocked_gh_fetch_merge_base.assert_called_once()
|
||||||
|
|
||||||
def test_app_can_revert(self, *args: Any) -> None:
|
|
||||||
pr = GitHubPR("pytorch", "pytorch", 164660)
|
|
||||||
repo = DummyGitRepo()
|
|
||||||
app_comment_id, impostor_comment_id = 3375785595, 3377647892
|
|
||||||
# Check that app can revert
|
|
||||||
self.assertIsNotNone(validate_revert(repo, pr, comment_id=app_comment_id))
|
|
||||||
# But impostor can not
|
|
||||||
self.assertRaises(
|
|
||||||
PostCommentError,
|
|
||||||
lambda: validate_revert(repo, pr, comment_id=impostor_comment_id),
|
|
||||||
)
|
|
||||||
# Despite it's name being the name of the bot
|
|
||||||
self.assertEqual(
|
|
||||||
pr.get_comment_by_id(impostor_comment_id).author_login,
|
|
||||||
"pytorch-auto-revert",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||||
|
|||||||
17
.github/scripts/trymerge.py
vendored
17
.github/scripts/trymerge.py
vendored
@ -17,12 +17,12 @@ import re
|
|||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections.abc import Callable, Iterable
|
from collections.abc import Iterable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from functools import cache
|
from functools import cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from re import Pattern
|
from re import Pattern
|
||||||
from typing import Any, cast, NamedTuple, Optional
|
from typing import Any, Callable, cast, NamedTuple, Optional
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
@ -234,7 +234,6 @@ query ($owner: String!, $name: String!, $number: Int!) {
|
|||||||
createdAt
|
createdAt
|
||||||
author {
|
author {
|
||||||
login
|
login
|
||||||
url
|
|
||||||
}
|
}
|
||||||
authorAssociation
|
authorAssociation
|
||||||
editor {
|
editor {
|
||||||
@ -1092,9 +1091,8 @@ class GitHubPR:
|
|||||||
editor = node["editor"]
|
editor = node["editor"]
|
||||||
return GitHubComment(
|
return GitHubComment(
|
||||||
body_text=node["bodyText"],
|
body_text=node["bodyText"],
|
||||||
created_at=node.get("createdAt", ""),
|
created_at=node["createdAt"] if "createdAt" in node else "",
|
||||||
author_login=node["author"]["login"],
|
author_login=node["author"]["login"],
|
||||||
author_url=node["author"].get("url", None),
|
|
||||||
author_association=node["authorAssociation"],
|
author_association=node["authorAssociation"],
|
||||||
editor_login=editor["login"] if editor else None,
|
editor_login=editor["login"] if editor else None,
|
||||||
database_id=node["databaseId"],
|
database_id=node["databaseId"],
|
||||||
@ -2031,17 +2029,16 @@ def validate_revert(
|
|||||||
# For some reason, one can not be a member of private repo, only CONTRIBUTOR
|
# For some reason, one can not be a member of private repo, only CONTRIBUTOR
|
||||||
if pr.is_base_repo_private():
|
if pr.is_base_repo_private():
|
||||||
allowed_reverters.append("CONTRIBUTOR")
|
allowed_reverters.append("CONTRIBUTOR")
|
||||||
# Special case the pytorch-auto-revert app, whose does not have association
|
|
||||||
# But should be able to issue revert command
|
|
||||||
if comment.author_url == "https://github.com/apps/pytorch-auto-revert":
|
|
||||||
allowed_reverters.append("NONE")
|
|
||||||
|
|
||||||
if author_association not in allowed_reverters:
|
if author_association not in allowed_reverters:
|
||||||
raise PostCommentError(
|
raise PostCommentError(
|
||||||
f"Will not revert as @{author_login} is not one of "
|
f"Will not revert as @{author_login} is not one of "
|
||||||
f"[{', '.join(allowed_reverters)}], but instead is {author_association}."
|
f"[{', '.join(allowed_reverters)}], but instead is {author_association}."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Raises exception if matching rule is not found, but ignores all status checks
|
||||||
|
find_matching_merge_rule(
|
||||||
|
pr, repo, skip_mandatory_checks=True, skip_internal_checks=True
|
||||||
|
)
|
||||||
commit_sha = get_pr_commit_sha(repo, pr)
|
commit_sha = get_pr_commit_sha(repo, pr)
|
||||||
return (author_login, commit_sha)
|
return (author_login, commit_sha)
|
||||||
|
|
||||||
|
|||||||
@ -177,9 +177,6 @@ jobs:
|
|||||||
runs-on: linux.rocm.gpu.mi250
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: !{{ common.timeout_minutes }}
|
timeout-minutes: !{{ common.timeout_minutes }}
|
||||||
!{{ upload.binary_env(config) }}
|
!{{ upload.binary_env(config) }}
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
steps:
|
steps:
|
||||||
- name: Setup ROCm
|
- name: Setup ROCm
|
||||||
uses: ./.github/actions/setup-rocm
|
uses: ./.github/actions/setup-rocm
|
||||||
|
|||||||
@ -26,8 +26,9 @@ name: !{{ build_environment }}
|
|||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
|
# TODO: Removeme once 3.14 is out
|
||||||
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
|
# .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3
|
||||||
python-version: "!{{ py_ver.strip('t') + ('.4' if '3.14' not in py_ver else '.0') }}"
|
python-version: "!{{ (py_ver.strip('t') + '.4') if '3.14' not in py_ver else '3.14.0-rc.2' }}"
|
||||||
freethreaded: !{{ "true" if py_ver.endswith('t') else "false" }}
|
freethreaded: !{{ "true" if py_ver.endswith('t') else "false" }}
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
||||||
|
|||||||
@ -79,9 +79,9 @@ jobs:
|
|||||||
runs-on: "windows-11-arm64-preview"
|
runs-on: "windows-11-arm64-preview"
|
||||||
{%- else %}
|
{%- else %}
|
||||||
{%- if branches == "nightly" %}
|
{%- if branches == "nightly" %}
|
||||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge"
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
|
||||||
{%- else %}
|
{%- else %}
|
||||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge.nonephemeral"
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
||||||
|
|||||||
2
.github/workflows/_docs.yml
vendored
2
.github/workflows/_docs.yml
vendored
@ -72,7 +72,7 @@ jobs:
|
|||||||
# Let's try to figure out how this can be improved
|
# Let's try to figure out how this can be improved
|
||||||
timeout-minutes: 360
|
timeout-minutes: 360
|
||||||
- docs_type: python
|
- docs_type: python
|
||||||
runner: ${{ inputs.runner_prefix }}linux.c7i.2xlarge
|
runner: ${{ inputs.runner_prefix }}linux.2xlarge
|
||||||
# It takes less than 30m to finish python docs unless there are issues
|
# It takes less than 30m to finish python docs unless there are issues
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
# Set a fixed name for this job instead of using the current matrix-generated name, i.e. build-docs (cpp, linux.12xlarge, 180)
|
# Set a fixed name for this job instead of using the current matrix-generated name, i.e. build-docs (cpp, linux.12xlarge, 180)
|
||||||
|
|||||||
2
.github/workflows/_linux-build.yml
vendored
2
.github/workflows/_linux-build.yml
vendored
@ -37,7 +37,7 @@ on:
|
|||||||
runner:
|
runner:
|
||||||
required: false
|
required: false
|
||||||
type: string
|
type: string
|
||||||
default: "linux.c7i.2xlarge"
|
default: "linux.2xlarge"
|
||||||
description: |
|
description: |
|
||||||
Label of the runner this job should run on.
|
Label of the runner this job should run on.
|
||||||
test-matrix:
|
test-matrix:
|
||||||
|
|||||||
42
.github/workflows/_linux-test.yml
vendored
42
.github/workflows/_linux-test.yml
vendored
@ -224,46 +224,6 @@ jobs:
|
|||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
uses: ./.github/actions/download-td-artifacts
|
uses: ./.github/actions/download-td-artifacts
|
||||||
|
|
||||||
- name: Download Windows torch wheel for cross-compilation
|
|
||||||
if: matrix.win_torch_wheel_artifact != ''
|
|
||||||
uses: seemethere/download-artifact-s3@1da556a7aa0a088e3153970611f6c432d58e80e6 # v4.2.0
|
|
||||||
with:
|
|
||||||
name: ${{ matrix.win_torch_wheel_artifact }}
|
|
||||||
path: win-torch-wheel
|
|
||||||
|
|
||||||
- name: Extract Windows wheel and setup CUDA libraries
|
|
||||||
if: matrix.win_torch_wheel_artifact != ''
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -x
|
|
||||||
|
|
||||||
# Find the wheel file
|
|
||||||
WHEEL_FILE=$(find win-torch-wheel -name "*.whl" -type f | head -n 1)
|
|
||||||
if [ -z "$WHEEL_FILE" ]; then
|
|
||||||
echo "Error: No wheel file found in win-torch-wheel directory"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Found wheel file: $WHEEL_FILE"
|
|
||||||
|
|
||||||
# Unzip the wheel file
|
|
||||||
unzip -q "$WHEEL_FILE" -d win-torch-wheel-extracted
|
|
||||||
echo "Extracted wheel contents"
|
|
||||||
|
|
||||||
# Setup CUDA libraries (cuda.lib and cudart.lib) directory
|
|
||||||
mkdir -p win-torch-wheel-extracted/lib/x64
|
|
||||||
if [ -f "win-torch-wheel/cuda.lib" ]; then
|
|
||||||
mv win-torch-wheel/cuda.lib win-torch-wheel-extracted/lib/x64/
|
|
||||||
echo "Moved cuda.lib to win-torch-wheel-extracted/lib/x64/"
|
|
||||||
fi
|
|
||||||
if [ -f "win-torch-wheel/cudart.lib" ]; then
|
|
||||||
mv win-torch-wheel/cudart.lib win-torch-wheel-extracted/lib/x64/
|
|
||||||
echo "Moved cudart.lib to win-torch-wheel-extracted/lib/x64/"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Verify CUDA libraries are present
|
|
||||||
echo "CUDA libraries:"
|
|
||||||
ls -la win-torch-wheel-extracted/lib/x64/ || echo "No CUDA libraries found"
|
|
||||||
|
|
||||||
- name: Parse ref
|
- name: Parse ref
|
||||||
id: parse-ref
|
id: parse-ref
|
||||||
run: .github/scripts/parse_ref.py
|
run: .github/scripts/parse_ref.py
|
||||||
@ -429,6 +389,8 @@ jobs:
|
|||||||
"${DOCKER_IMAGE}" \
|
"${DOCKER_IMAGE}" \
|
||||||
${DOCKER_SHELL_CMD}
|
${DOCKER_SHELL_CMD}
|
||||||
)
|
)
|
||||||
|
# Propagate download.pytorch.org IP to container
|
||||||
|
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" sudo bash -c "/bin/cat >> /etc/hosts"
|
||||||
echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
|
echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
|
||||||
|
|
||||||
if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
|
if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
|
||||||
|
|||||||
17
.github/workflows/_rocm-test.yml
vendored
17
.github/workflows/_rocm-test.yml
vendored
@ -97,11 +97,24 @@ jobs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
|
ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
|
||||||
if [[ $ngpu -lt 2 ]]; then #We are temporarily reducing this down to 2 from 4 so that we can run tests on nodes with less gpus.
|
if [[ $ngpu -lt 4 ]]; then
|
||||||
echo "Error: only $ngpu GPU(s) detected, at least 2 GPUs are needed for distributed jobs"
|
echo "Error: only $ngpu GPU(s) detected, at least 4 GPUs are needed for distributed jobs"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: configure aws credentials
|
||||||
|
id: aws_creds
|
||||||
|
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
|
||||||
|
with:
|
||||||
|
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||||
|
aws-region: us-east-1
|
||||||
|
role-duration-seconds: 18000
|
||||||
|
|
||||||
|
- name: Login to Amazon ECR
|
||||||
|
id: login-ecr
|
||||||
|
continue-on-error: true
|
||||||
|
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
|
||||||
|
|
||||||
- name: Calculate docker image
|
- name: Calculate docker image
|
||||||
id: calculate-docker-image
|
id: calculate-docker-image
|
||||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
|||||||
25
.github/workflows/_win-build.yml
vendored
25
.github/workflows/_win-build.yml
vendored
@ -168,31 +168,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
.ci/pytorch/win-build.sh
|
.ci/pytorch/win-build.sh
|
||||||
|
|
||||||
# Collect Windows torch libs and CUDA libs for cross-compilation
|
|
||||||
- name: Collect Windows CUDA libs for cross-compilation
|
|
||||||
if: steps.build.outcome != 'skipped' && inputs.cuda-version != 'cpu'
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
# Create directory structure if does not exist
|
|
||||||
mkdir -p /c/${{ github.run_id }}/build-results
|
|
||||||
|
|
||||||
# Copy CUDA libs
|
|
||||||
CUDA_PATH="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${{ inputs.cuda-version }}"
|
|
||||||
|
|
||||||
if [ -f "${CUDA_PATH}/lib/x64/cuda.lib" ]; then
|
|
||||||
cp "${CUDA_PATH}/lib/x64/cuda.lib" /c/${{ github.run_id }}/build-results/
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -f "${CUDA_PATH}/lib/x64/cudart.lib" ]; then
|
|
||||||
cp "${CUDA_PATH}/lib/x64/cudart.lib" /c/${{ github.run_id }}/build-results/
|
|
||||||
fi
|
|
||||||
|
|
||||||
# List collected files
|
|
||||||
echo "Collected CUDA libs:"
|
|
||||||
ls -lah /c/${{ github.run_id }}/build-results/*.lib
|
|
||||||
|
|
||||||
# Upload to github so that people can click and download artifacts
|
# Upload to github so that people can click and download artifacts
|
||||||
- name: Upload artifacts to s3
|
- name: Upload artifacts to s3
|
||||||
if: steps.build.outcome != 'skipped'
|
if: steps.build.outcome != 'skipped'
|
||||||
|
|||||||
29
.github/workflows/_xpu-test.yml
vendored
29
.github/workflows/_xpu-test.yml
vendored
@ -38,10 +38,6 @@ on:
|
|||||||
default: ""
|
default: ""
|
||||||
description: |
|
description: |
|
||||||
List of tests to include (empty string implies default list)
|
List of tests to include (empty string implies default list)
|
||||||
dashboard-tag:
|
|
||||||
required: false
|
|
||||||
type: string
|
|
||||||
default: ""
|
|
||||||
disable-monitor:
|
disable-monitor:
|
||||||
description: |
|
description: |
|
||||||
[Experimental] Disable utilization monitoring for tests.
|
[Experimental] Disable utilization monitoring for tests.
|
||||||
@ -62,11 +58,6 @@ on:
|
|||||||
required: false
|
required: false
|
||||||
type: number
|
type: number
|
||||||
default: 1
|
default: 1
|
||||||
secrets:
|
|
||||||
HUGGING_FACE_HUB_TOKEN:
|
|
||||||
required: false
|
|
||||||
description: |
|
|
||||||
HF Auth token to avoid rate limits when downloading models or datasets from hub
|
|
||||||
permissions:
|
permissions:
|
||||||
id-token: write
|
id-token: write
|
||||||
contents: read
|
contents: read
|
||||||
@ -205,8 +196,6 @@ jobs:
|
|||||||
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
|
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
|
||||||
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
|
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
|
||||||
TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }}
|
TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }}
|
||||||
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
|
|
||||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
|
||||||
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
|
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
|
||||||
run: |
|
run: |
|
||||||
# Fetch aws credential from IMDs
|
# Fetch aws credential from IMDs
|
||||||
@ -257,8 +246,6 @@ jobs:
|
|||||||
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
|
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
|
||||||
-e TESTS_TO_INCLUDE \
|
-e TESTS_TO_INCLUDE \
|
||||||
-e ZE_AFFINITY_MASK \
|
-e ZE_AFFINITY_MASK \
|
||||||
-e HUGGING_FACE_HUB_TOKEN \
|
|
||||||
-e DASHBOARD_TAG \
|
|
||||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||||
--ulimit stack=10485760:83886080 \
|
--ulimit stack=10485760:83886080 \
|
||||||
--ulimit core=0 \
|
--ulimit core=0 \
|
||||||
@ -344,21 +331,5 @@ jobs:
|
|||||||
if-no-files-found: ignore
|
if-no-files-found: ignore
|
||||||
path: ./**/core.[1-9]*
|
path: ./**/core.[1-9]*
|
||||||
|
|
||||||
- name: Authenticate with AWS
|
|
||||||
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
|
|
||||||
with:
|
|
||||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
|
|
||||||
# The max duration enforced by the server side
|
|
||||||
role-duration-seconds: 18000
|
|
||||||
aws-region: us-east-1
|
|
||||||
|
|
||||||
- name: Upload the benchmark results
|
|
||||||
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
|
|
||||||
with:
|
|
||||||
benchmark-results-dir: test/test-reports
|
|
||||||
dry-run: false
|
|
||||||
schema-version: v3
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Teardown XPU
|
- name: Teardown XPU
|
||||||
uses: ./.github/actions/teardown-xpu
|
uses: ./.github/actions/teardown-xpu
|
||||||
|
|||||||
61
.github/workflows/b200-distributed.yml
vendored
61
.github/workflows/b200-distributed.yml
vendored
@ -1,61 +0,0 @@
|
|||||||
name: CI for distributed tests on B200
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- .github/workflows/b200-distributed.yml
|
|
||||||
workflow_dispatch:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- ciflow/b200-distributed/*
|
|
||||||
schedule:
|
|
||||||
- cron: 46 8 * * * # about 1:46am PDT
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
|
|
||||||
get-label-type:
|
|
||||||
if: github.repository_owner == 'pytorch'
|
|
||||||
name: get-label-type
|
|
||||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
|
||||||
with:
|
|
||||||
triggering_actor: ${{ github.triggering_actor }}
|
|
||||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
|
||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
|
||||||
|
|
||||||
linux-jammy-cuda12_8-py3_10-gcc11-build-distributed-b200:
|
|
||||||
name: linux-jammy-cuda12.8-py3.10-gcc11-build-distributed-b200
|
|
||||||
uses: ./.github/workflows/_linux-build.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-distributed-b200
|
|
||||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
|
||||||
cuda-arch-list: '10.0'
|
|
||||||
test-matrix: |
|
|
||||||
{ include: [
|
|
||||||
{ config: "distributed", shard: 1, num_shards: 2, runner: "linux.dgx.b200.8" },
|
|
||||||
{ config: "distributed", shard: 2, num_shards: 2, runner: "linux.dgx.b200.8" },
|
|
||||||
]}
|
|
||||||
secrets: inherit
|
|
||||||
|
|
||||||
linux-jammy-cuda12_8-py3_10-gcc11-test-distributed-b200:
|
|
||||||
name: linux-jammy-cuda12.8-py3.10-gcc11-test-b200
|
|
||||||
uses: ./.github/workflows/_linux-test.yml
|
|
||||||
needs:
|
|
||||||
- linux-jammy-cuda12_8-py3_10-gcc11-build-distributed-b200
|
|
||||||
with:
|
|
||||||
timeout-minutes: 1200
|
|
||||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-distributed-b200
|
|
||||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build-distributed-b200.outputs.docker-image }}
|
|
||||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build-distributed-b200.outputs.test-matrix }}
|
|
||||||
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
|
||||||
secrets: inherit
|
|
||||||
1
.github/workflows/b200-symm-mem.yml
vendored
1
.github/workflows/b200-symm-mem.yml
vendored
@ -37,6 +37,7 @@ jobs:
|
|||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
with:
|
with:
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
|
runner: linux.12xlarge.memory
|
||||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
|
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
|
||||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||||
cuda-arch-list: '10.0'
|
cuda-arch-list: '10.0'
|
||||||
|
|||||||
2
.github/workflows/build-almalinux-images.yml
vendored
2
.github/workflows/build-almalinux-images.yml
vendored
@ -36,7 +36,7 @@ jobs:
|
|||||||
runs-on: linux.9xlarge.ephemeral
|
runs-on: linux.9xlarge.ephemeral
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm7.0", "rocm7.1", "cpu"]
|
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.4", "rocm7.0", "cpu"]
|
||||||
steps:
|
steps:
|
||||||
- name: Build docker image
|
- name: Build docker image
|
||||||
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user