Compare commits

..

4 Commits

Author SHA1 Message Date
suo
113cdd7044 really fix lint 2025-09-25 10:47:47 -07:00
suo
d28258fca2 fix lint 2025-09-25 10:32:54 -07:00
suo
c543089707 add test 2025-09-25 10:20:02 -07:00
suo
fc415518ec [DTensor] Add sharding prop strategy for logsumexp
As title
2025-09-25 09:53:27 -07:00
2007 changed files with 27317 additions and 50663 deletions

View File

@ -15,8 +15,6 @@ fi
# Compress the fatbin with -compress-mode=size for CUDA 13
if [[ "$DESIRED_CUDA" == *"13"* ]]; then
export TORCH_NVCC_FLAGS="-compress-mode=size"
# Bundle ptxas into the cu13 wheel, see https://github.com/pytorch/pytorch/issues/163801
export BUILD_BUNDLE_PTXAS=1
fi
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"

View File

@ -13,6 +13,49 @@ def list_dir(path: str) -> list[str]:
return check_output(["ls", "-1", path]).decode().split("\n")
def build_ArmComputeLibrary() -> None:
"""
Using ArmComputeLibrary for aarch64 PyTorch
"""
print("Building Arm Compute Library")
acl_build_flags = [
"debug=0",
"neon=1",
"opencl=0",
"os=linux",
"openmp=1",
"cppthreads=0",
"arch=armv8a",
"multi_isa=1",
"fixed_format_kernels=1",
"build=native",
]
acl_install_dir = "/acl"
acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary")
if os.path.isdir(acl_install_dir):
shutil.rmtree(acl_install_dir)
if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)):
check_call(
[
"git",
"clone",
"https://github.com/ARM-software/ComputeLibrary.git",
"-b",
"v25.02",
"--depth",
"1",
"--shallow-submodules",
]
)
check_call(
["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags,
cwd=acl_checkout_dir,
)
for d in ["arm_compute", "include", "utils", "support", "src", "build"]:
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
def replace_tag(filename) -> None:
with open(filename) as f:
lines = f.readlines()
@ -313,17 +356,23 @@ if __name__ == "__main__":
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
if enable_mkldnn:
build_ArmComputeLibrary()
print("build pytorch with mkldnn+acl backend")
build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
build_vars += "ACL_ROOT_DIR=/acl "
build_vars += (
"USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
"ACL_ROOT_DIR=/acl "
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH "
"ACL_INCLUDE_DIR=/acl/build "
"ACL_LIBRARY=/acl/build "
)
if enable_cuda:
build_vars += "BLAS=NVPL "
else:
build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/opt/OpenBLAS "
build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS "
else:
print("build pytorch without mkldnn backend")
os.system(f"cd /pytorch; {build_vars} python3 -m build --wheel --no-isolation")
os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
if enable_cuda:
print("Updating Cuda Dependency")
filename = os.listdir("/pytorch/dist/")

View File

@ -299,6 +299,40 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None:
)
def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None:
print("Building OpenBLAS")
host.run_cmd(
f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.28 {git_clone_flags}"
)
make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8"
host.run_cmd(
f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS"
)
def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None:
print("Building Arm Compute Library")
acl_build_flags = " ".join(
[
"debug=0",
"neon=1",
"opencl=0",
"os=linux",
"openmp=1",
"cppthreads=0",
"arch=armv8a",
"multi_isa=1",
"fixed_format_kernels=1",
"build=native",
]
)
host.run_cmd(
f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}"
)
host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None:
host.run_cmd("pip3 install auditwheel")
host.run_cmd(
@ -408,7 +442,7 @@ def build_torchvision(
if host.using_docker():
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
host.run_cmd(f"cd vision && {build_vars} python3 -m build --wheel --no-isolation")
host.run_cmd(f"cd vision && {build_vars} python3 setup.py bdist_wheel")
vision_wheel_name = host.list_dir("vision/dist")[0]
embed_libgomp(host, use_conda, os.path.join("vision", "dist", vision_wheel_name))
@ -463,7 +497,7 @@ def build_torchdata(
if host.using_docker():
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
host.run_cmd(f"cd data && {build_vars} python3 -m build --wheel --no-isolation")
host.run_cmd(f"cd data && {build_vars} python3 setup.py bdist_wheel")
wheel_name = host.list_dir("data/dist")[0]
embed_libgomp(host, use_conda, os.path.join("data", "dist", wheel_name))
@ -519,7 +553,7 @@ def build_torchtext(
if host.using_docker():
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
host.run_cmd(f"cd text && {build_vars} python3 -m build --wheel --no-isolation")
host.run_cmd(f"cd text && {build_vars} python3 setup.py bdist_wheel")
wheel_name = host.list_dir("text/dist")[0]
embed_libgomp(host, use_conda, os.path.join("text", "dist", wheel_name))
@ -580,7 +614,7 @@ def build_torchaudio(
host.run_cmd(
f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
&& ./packaging/ffmpeg/build.sh \
&& {build_vars} python3 -m build --wheel --no-isolation"
&& {build_vars} python3 setup.py bdist_wheel"
)
wheel_name = host.list_dir("audio/dist")[0]
@ -666,6 +700,7 @@ def start_build(
configure_system(
host, compiler=compiler, use_conda=use_conda, python_version=python_version
)
build_OpenBLAS(host, git_clone_flags)
if host.using_docker():
print("Move libgfortant.a into a standard location")
@ -688,12 +723,10 @@ def start_build(
f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}"
)
host.run_cmd("pytorch/.ci/docker/common/install_openblas.sh")
print("Building PyTorch wheel")
build_opts = ""
if pytorch_build_number is not None:
build_opts += f" -C--build-option=--build-number={pytorch_build_number}"
build_opts += f" --build-number {pytorch_build_number}"
# Breakpad build fails on aarch64
build_vars = "USE_BREAKPAD=0 "
if branch == "nightly":
@ -710,18 +743,15 @@ def start_build(
if host.using_docker():
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
if enable_mkldnn:
host.run_cmd("pytorch/.ci/docker/common/install_acl.sh")
build_ArmComputeLibrary(host, git_clone_flags)
print("build pytorch with mkldnn+acl backend")
build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
build_vars += " BLAS=OpenBLAS"
build_vars += " OpenBLAS_HOME=/opt/OpenBLAS"
build_vars += " ACL_ROOT_DIR=/acl"
host.run_cmd(
f"cd $HOME/pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}"
f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}"
)
print("Repair the wheel")
pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
ld_library_path = "/acl/build:$HOME/pytorch/build/lib"
ld_library_path = "$HOME/acl/build:$HOME/pytorch/build/lib"
host.run_cmd(
f"export LD_LIBRARY_PATH={ld_library_path} && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}"
)
@ -733,7 +763,7 @@ def start_build(
else:
print("build pytorch without mkldnn backend")
host.run_cmd(
f"cd pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}"
f"cd pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}"
)
print("Deleting build folder")
@ -877,7 +907,7 @@ def terminate_instances(instance_type: str) -> None:
def parse_arguments():
from argparse import ArgumentParser
parser = ArgumentParser("Build and test AARCH64 wheels using EC2")
parser = ArgumentParser("Builid and test AARCH64 wheels using EC2")
parser.add_argument("--key-name", type=str)
parser.add_argument("--debug", action="store_true")
parser.add_argument("--build-only", action="store_true")

View File

@ -69,8 +69,7 @@ RUN bash ./install_cuda.sh 13.0
ENV DESIRED_CUDA=13.0
FROM ${ROCM_IMAGE} as rocm
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
ADD ./common/install_mkl.sh install_mkl.sh
RUN bash ./install_mkl.sh && rm install_mkl.sh
ENV MKLROOT /opt/intel

View File

@ -36,12 +36,6 @@ case ${DOCKER_TAG_PREFIX} in
;;
rocm*)
BASE_TARGET=rocm
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
# add gfx950, gfx115x conditionally starting in ROCm 7.0
if [[ "$ROCM_VERSION" == *"7.0"* ]]; then
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
fi
EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
;;
*)
echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"

View File

@ -84,8 +84,8 @@ fi
_UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152
_UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96
if [[ "$image" == *rocm* ]]; then
_UCX_COMMIT=29831d319e6be55cb8c768ca61de335c934ca39e
_UCC_COMMIT=9f4b242cbbd8b1462cbc732eb29316cdfa124b77
_UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
_UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
fi
tag=$(echo $image | awk -F':' '{print $2}')
@ -175,16 +175,27 @@ case "$tag" in
fi
GCC_VERSION=11
VISION=yes
ROCM_VERSION=6.4
NINJA_VERSION=1.9.0
TRITON=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
if [[ $tag =~ "benchmarks" ]]; then
INDUCTOR_BENCHMARKS=yes
fi
;;
pytorch-linux-noble-rocm-alpha-py3)
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
VISION=yes
ROCM_VERSION=7.0
NINJA_VERSION=1.9.0
TRITON=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950;gfx1100"
if [[ $tag =~ "benchmarks" ]]; then
INDUCTOR_BENCHMARKS=yes
fi
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
;;
pytorch-linux-jammy-xpu-n-1-py3)
ANACONDA_PYTHON_VERSION=3.10
@ -344,7 +355,7 @@ docker build \
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
--build-arg "KATEX=${KATEX:-}" \
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" \
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
@ -441,3 +452,12 @@ elif [ "$HAS_TRITON" = "yes" ]; then
echo "expecting triton to not be installed, but it is"
exit 1
fi
# Sanity check cmake version. Executorch reinstalls cmake and I'm not sure if
# they support 4.0.0 yet, so exclude them from this check.
CMAKE_VERSION=$(drun cmake --version)
if [[ "$EXECUTORCH" != *yes* && "$CMAKE_VERSION" != *4.* ]]; then
echo "CMake version is not 4.0.0:"
drun cmake --version
exit 1
fi

View File

@ -1 +1 @@
deb42f2a8e48f5032b4a98ee781a15fa87a157cf
e0dda9059d082537cee36be6c5e4fe3b18c880c0

View File

@ -1 +1 @@
v2.27.5-1
v2.27.5-1

View File

@ -1 +1 @@
7416ffcb92cdbe98d9f97e4e6f95247e46dfc9fd
bbb06c0334a6772b92d24bde54956e675c8c6604

27
.ci/docker/common/install_acl.sh Executable file → Normal file
View File

@ -1,27 +1,16 @@
#!/bin/bash
# Script used only in CD pipeline
set -euo pipefail
set -eux
ACL_VERSION=${ACL_VERSION:-"v25.02"}
ACL_INSTALL_DIR="/acl"
readonly version=v25.02
readonly src_host=https://github.com/ARM-software
readonly src_repo=ComputeLibrary
# Clone ACL
git clone https://github.com/ARM-software/ComputeLibrary.git -b "${ACL_VERSION}" --depth 1 --shallow-submodules
[[ ! -d ${src_repo} ]] && git clone ${src_host}/${src_repo}.git
cd ${src_repo}
git checkout $version
ACL_CHECKOUT_DIR="ComputeLibrary"
# Build with scons
pushd $ACL_CHECKOUT_DIR
scons -j8 Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \
os=linux arch=armv8a build=native multi_isa=1 \
fixed_format_kernels=1 openmp=1 cppthreads=0
popd
# Install ACL
sudo mkdir -p ${ACL_INSTALL_DIR}
for d in arm_compute include utils support src build
do
sudo cp -r ${ACL_CHECKOUT_DIR}/${d} ${ACL_INSTALL_DIR}/${d}
done
rm -rf $ACL_CHECKOUT_DIR

View File

@ -19,8 +19,8 @@ pip_install \
transformers==4.36.2
pip_install coloredlogs packaging
pip_install onnxruntime==1.23.0
pip_install onnxscript==0.5.3
pip_install onnxruntime==1.22.1
pip_install onnxscript==0.4.0
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/

12
.ci/docker/common/install_openblas.sh Executable file → Normal file
View File

@ -3,10 +3,8 @@
set -ex
OPENBLAS_VERSION=${OPENBLAS_VERSION:-"v0.3.30"}
# Clone OpenBLAS
git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION}" --depth 1 --shallow-submodules
cd /
git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION:-v0.3.30}" --depth 1 --shallow-submodules
OPENBLAS_CHECKOUT_DIR="OpenBLAS"
OPENBLAS_BUILD_FLAGS="
@ -19,7 +17,5 @@ CFLAGS=-O3
BUILD_BFLOAT16=1
"
make -j8 ${OPENBLAS_BUILD_FLAGS} -C $OPENBLAS_CHECKOUT_DIR
sudo make install -C $OPENBLAS_CHECKOUT_DIR
rm -rf $OPENBLAS_CHECKOUT_DIR
make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR}
make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR}

View File

@ -42,6 +42,12 @@ EOF
rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
# Special case for ROCM_VERSION == 7.0
if [[ $(ver "$ROCM_VERSION") -eq $(ver 7.0) ]]; then
rocm_baseurl="https://repo.radeon.com/rocm/apt/7.0_alpha2"
amdgpu_baseurl="https://repo.radeon.com/amdgpu/30.10_alpha2/ubuntu"
fi
# Add amdgpu repository
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list

View File

@ -12,8 +12,8 @@ function do_install() {
rocm_version_nodot=${rocm_version//./}
# https://github.com/icl-utk-edu/magma/pull/65
MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
# Version 2.7.2 + ROCm related updates
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
rocm_dir="/opt/rocm"

View File

@ -66,15 +66,15 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}"
# Triton needs at least gcc-9 to build
apt-get install -y g++-9
CXX=g++-9 conda_run python -m build --wheel --no-isolation
CXX=g++-9 conda_run python setup.py bdist_wheel
elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
# Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get install -y g++-9
CXX=g++-9 conda_run python -m build --wheel --no-isolation
CXX=g++-9 conda_run python setup.py bdist_wheel
else
conda_run python -m build --wheel --no-isolation
conda_run python setup.py bdist_wheel
fi
# Copy the wheel to /opt for multi stage docker builds

View File

@ -1,9 +0,0 @@
#!/bin/bash
set -xe
# Script used in Linux x86 and aarch64 CD pipeline
# Workaround for exposing statically linked libstdc++ CXX11 ABI symbols.
# see: https://github.com/pytorch/pytorch/issues/133437
LIBNONSHARED=$(gcc -print-file-name=libstdc++_nonshared.a)
nm -g $LIBNONSHARED | grep " T " | grep recursive_directory_iterator | cut -c 20- > weaken-symbols.txt
objcopy --weaken-symbols weaken-symbols.txt $LIBNONSHARED $LIBNONSHARED

View File

@ -40,16 +40,12 @@ case ${DOCKER_TAG_PREFIX} in
;;
rocm*)
# we want the patch version of 6.4 instead
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
fi
BASE_TARGET=rocm
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
# add gfx950, gfx115x conditionally starting in ROCm 7.0
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
fi
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
;;
*)

View File

@ -130,8 +130,7 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/op
RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
/opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
done;
ADD ./common/patch_libstdc.sh patch_libstdc.sh
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
# cmake-3.18.4 from pip; force in case cmake3 already exists
RUN yum install -y python3-pip && \

View File

@ -62,13 +62,6 @@ ARG OPENBLAS_VERSION
ADD ./common/install_openblas.sh install_openblas.sh
RUN bash ./install_openblas.sh && rm install_openblas.sh
# Install Arm Compute Library
FROM base as arm_compute
# use python3.9 to install scons
RUN python3.9 -m pip install scons==4.7.0
RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin
COPY ./common/install_acl.sh install_acl.sh
RUN bash ./install_acl.sh && rm install_acl.sh
FROM base as final
# remove unnecessary python versions
@ -77,7 +70,4 @@ RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/
COPY --from=arm_compute /acl /acl
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:/acl/build/:$LD_LIBRARY_PATH
ADD ./common/patch_libstdc.sh patch_libstdc.sh
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH

View File

@ -86,15 +86,6 @@ FROM base as nvpl
ADD ./common/install_nvpl.sh install_nvpl.sh
RUN bash ./install_nvpl.sh && rm install_nvpl.sh
# Install Arm Compute Library
FROM base as arm_compute
# use python3.9 to install scons
RUN python3.9 -m pip install scons==4.7.0
RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin
COPY ./common/install_acl.sh install_acl.sh
RUN bash ./install_acl.sh && rm install_acl.sh
FROM base as final
FROM final as cuda_final
ARG BASE_CUDA_VERSION
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
@ -102,9 +93,5 @@ COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BAS
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=nvpl /opt/nvpl/lib/ /usr/local/lib/
COPY --from=nvpl /opt/nvpl/include/ /usr/local/include/
COPY --from=arm_compute /acl /acl
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
ENV PATH=/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=/acl/build/:$LD_LIBRARY_PATH
ADD ./common/patch_libstdc.sh patch_libstdc.sh
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh

View File

@ -0,0 +1,71 @@
FROM centos:8 as base
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ENV PATH /opt/rh/gcc-toolset-11/root/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# change to a valid repo
RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*.repo
# enable to install ninja-build
RUN sed -i 's|enabled=0|enabled=1|g' /etc/yum.repos.d/CentOS-Linux-PowerTools.repo
RUN yum -y update
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which zlib-devel sudo
RUN yum install -y autoconf automake make cmake gdb gcc-toolset-11-gcc-c++
FROM base as openssl
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
# Install python
FROM base as python
RUN yum install -y openssl-devel zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel
ADD common/install_cpython.sh install_cpython.sh
RUN bash ./install_cpython.sh && rm install_cpython.sh
FROM base as conda
ADD ./common/install_conda_docker.sh install_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh
RUN /opt/conda/bin/conda install -y cmake
FROM base as intel
# Install MKL
COPY --from=python /opt/python /opt/python
COPY --from=python /opt/_internal /opt/_internal
COPY --from=conda /opt/conda /opt/conda
ENV PATH=/opt/conda/bin:$PATH
ADD ./common/install_mkl.sh install_mkl.sh
RUN bash ./install_mkl.sh && rm install_mkl.sh
FROM base as patchelf
ADD ./common/install_patchelf.sh install_patchelf.sh
RUN bash ./install_patchelf.sh && rm install_patchelf.sh
RUN cp $(which patchelf) /patchelf
FROM base as jni
ADD ./common/install_jni.sh install_jni.sh
ADD ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
FROM base as libpng
ADD ./common/install_libpng.sh install_libpng.sh
RUN bash ./install_libpng.sh && rm install_libpng.sh
FROM base as final
COPY --from=openssl /opt/openssl /opt/openssl
COPY --from=python /opt/python /opt/python
COPY --from=python /opt/_internal /opt/_internal
COPY --from=intel /opt/intel /opt/intel
COPY --from=conda /opt/conda /opt/conda
COPY --from=patchelf /usr/local/bin/patchelf /usr/local/bin/patchelf
COPY --from=jni /usr/local/include/jni.h /usr/local/include/jni.h
COPY --from=libpng /usr/local/bin/png* /usr/local/bin/
COPY --from=libpng /usr/local/bin/libpng* /usr/local/bin/
COPY --from=libpng /usr/local/include/png* /usr/local/include/
COPY --from=libpng /usr/local/include/libpng* /usr/local/include/
COPY --from=libpng /usr/local/lib/libpng* /usr/local/lib/
COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/lib/pkgconfig
RUN yum install -y ninja-build

View File

@ -115,9 +115,6 @@ RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
# cmake-3.28.0 from pip for onnxruntime
RUN python3 -mpip install cmake==3.28.0
ADD ./common/patch_libstdc.sh patch_libstdc.sh
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
# build onnxruntime 1.21.0 from sources.
# it is not possible to build it from sources using pip,
# so just build it from upstream repository.

View File

@ -28,7 +28,6 @@ fi
MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-}
DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-}
OPENBLAS_VERSION=${OPENBLAS_VERSION:-}
ACL_VERSION=${ACL_VERSION:-}
case ${image} in
manylinux2_28-builder:cpu)
@ -42,6 +41,13 @@ case ${image} in
GPU_IMAGE=arm64v8/almalinux:8
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13 --build-arg NINJA_VERSION=1.12.1"
MANY_LINUX_VERSION="2_28_aarch64"
OPENBLAS_VERSION="v0.3.30"
;;
manylinuxcxx11-abi-builder:cpu-cxx11-abi)
TARGET=final
GPU_IMAGE=""
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
MANY_LINUX_VERSION="cxx11-abi"
;;
manylinuxs390x-builder:cpu-s390x)
TARGET=final
@ -76,7 +82,7 @@ case ${image} in
;;
manylinux2_28-builder:rocm*)
# we want the patch version of 6.4 instead
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
fi
TARGET=rocm_final
@ -84,10 +90,6 @@ case ${image} in
DEVTOOLSET_VERSION="11"
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
# add gfx950, gfx115x conditionally starting in ROCm 7.0
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
fi
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
;;
manylinux2_28-builder:xpu)
@ -119,8 +121,7 @@ tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
DOCKER_BUILDKIT=1 docker build \
${DOCKER_GPU_BUILD_ARG} \
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
--build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION:-}" \
--build-arg "ACL_VERSION=${ACL_VERSION:-}" \
--build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION}" \
--target "${TARGET}" \
-t "${tmp_tag}" \
$@ \

View File

@ -10,6 +10,11 @@ BAD_SSL = "https://self-signed.badssl.com"
print("Testing SSL certificate checking for Python:", sys.version)
if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
print("This version never checks SSL certs; skipping tests")
sys.exit(0)
EXC = OSError
print(f"Connecting to {GOOD_SSL} should work")

View File

@ -10,11 +10,6 @@ boto3==1.35.42
#Pinned versions: 1.19.12, 1.16.34
#test that import:
build==1.3.0
#Description: A simple, correct Python build frontend.
#Pinned versions: 1.3.0
#test that import:
click
#Description: Command Line Interface Creation Kit
#Pinned versions:
@ -52,10 +47,10 @@ flatbuffers==24.12.23
#Pinned versions: 24.12.23
#test that import:
hypothesis==6.56.4
hypothesis==5.35.1
# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
#Description: advanced library for generating parametrized tests
#Pinned versions: 6.56.4
#Pinned versions: 5.35.1
#test that import: test_xnnpack_integration.py, test_pruning_op.py, test_nn.py
junitparser==2.1.1
@ -98,7 +93,7 @@ librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x"
#Pinned versions:
#test that import:
mypy==1.16.0 ; platform_system == "Linux"
mypy==1.16.0 ; platform_system != "Windows"
# Pin MyPy version because new errors are likely to appear with each release
# Skip on Windows as lots of type annotations are POSIX specific
#Description: linter
@ -111,17 +106,18 @@ networkx==2.8.8
#Pinned versions: 2.8.8
#test that import: functorch
ninja==1.11.1.4
ninja==1.11.1.3
#Description: build system. Used in some tests. Used in build to generate build
#time tracing information
#Pinned versions: 1.11.1.4
#Pinned versions: 1.11.1.3
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
#Description: Just-In-Time Compiler for Numerical Functions
#Pinned versions: 0.55.2, 0.60.0
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
#test that import: test_numba_integration.py
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
#Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073
#numpy
@ -168,12 +164,12 @@ optree==0.13.0
pillow==11.0.0
#Description: Python Imaging Library fork
#Pinned versions: 11.0.0
#Pinned versions: 10.3.0
#test that import:
protobuf==5.29.5
protobuf==5.29.4
#Description: Google's data interchange format
#Pinned versions: 5.29.5
#Pinned versions: 5.29.4
#test that import: test_tensorboard.py, test/onnx/*
psutil
@ -216,7 +212,7 @@ pytest-subtests==0.13.1
#Pinned versions:
#test that import:
xdoctest==1.3.0
xdoctest==1.1.0
#Description: runs doctests in pytest
#Pinned versions: 1.1.0
#test that import:
@ -241,9 +237,10 @@ pygments==2.15.0
#Pinned versions: 14.1.0
#test that import:
scikit-image==0.22.0
scikit-image==0.19.3 ; python_version < "3.10"
scikit-image==0.22.0 ; python_version >= "3.10"
#Description: image processing routines
#Pinned versions: 0.22.0
#Pinned versions:
#test that import: test_nn.py
#scikit-learn
@ -266,7 +263,7 @@ scipy==1.14.1 ; python_version >= "3.12"
#test that import:
# needed by torchgen utils
typing-extensions==4.12.2
typing-extensions>=4.10.0
#Description: type hints for python
#Pinned versions:
#test that import:
@ -339,7 +336,7 @@ onnx==1.18.0
#Pinned versions:
#test that import:
onnxscript==0.5.3
onnxscript==0.4.0
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
#Pinned versions:
#test that import:
@ -359,10 +356,9 @@ pwlf==2.2.1
#test that import: test_sac_estimator.py
# To build PyTorch itself
pyyaml==6.0.2
pyyaml
pyzstd
setuptools==78.1.1
packaging==23.1
setuptools>=70.1.0
six
scons==4.5.2 ; platform_machine == "aarch64"
@ -377,16 +373,13 @@ dataclasses_json==0.6.7
#Pinned versions: 0.6.7
#test that import:
cmake==3.31.6
cmake==4.0.0
#Description: required for building
tlparse==0.4.0
#Description: required for log parsing
filelock==3.18.0
#Description: required for inductor testing
cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x" and platform_system != "Darwin"
cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x"
#Description: required for testing CUDAGraph::raw_cuda_graph(). See https://nvidia.github.io/cuda-python/cuda-bindings/latest/support.html for how this version was chosen. Note "Any fix in the latest bindings would be backported to the prior major version" means that only the newest version of cuda-bindings will get fixes. Depending on the latest version of 12.x is okay because all 12.y versions will be supported via "CUDA minor version compatibility". Pytorch builds against 13.z versions of cuda toolkit work with 12.x versions of cuda-bindings as well because newer drivers work with old toolkits.
#test that import: test_cuda.py

View File

@ -1,15 +1,8 @@
sphinx==5.3.0
#Description: This is used to generate PyTorch docs
#Pinned versions: 5.3.0
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@d53b0ffb9b1cda68260693ea98f3483823c88d8e#egg=pytorch_sphinx_theme2
standard-imghdr==3.13.0; python_version >= "3.13"
#Description: This is needed by Sphinx, so it needs to be added here.
# The reasons are as follows:
# 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr);
# 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13.
# Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency.
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
# something related to Docker setup. We can investigate this later.

View File

@ -1,11 +1,11 @@
SHELL=/usr/bin/env bash
DOCKER_CMD ?= docker
DESIRED_ROCM ?= 7.0
DESIRED_ROCM ?= 6.4
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
PACKAGE_NAME = magma-rocm
# inherit this from underlying docker image, do not pass this env var to docker
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
@ -16,20 +16,20 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
magma-rocm/build_magma.sh
.PHONY: all
all: magma-rocm70
all: magma-rocm64
all: magma-rocm63
.PHONY:
clean:
$(RM) -r magma-*
$(RM) -r output
.PHONY: magma-rocm70
magma-rocm70: DESIRED_ROCM := 7.0
magma-rocm70:
$(DOCKER_RUN)
.PHONY: magma-rocm64
magma-rocm64: DESIRED_ROCM := 6.4
magma-rocm64:
$(DOCKER_RUN)
.PHONY: magma-rocm63
magma-rocm63: DESIRED_ROCM := 6.3
magma-rocm63:
$(DOCKER_RUN)

View File

@ -6,8 +6,8 @@ set -eou pipefail
# The script expects DESIRED_CUDA and PACKAGE_NAME to be set
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
# https://github.com/icl-utk-edu/magma/pull/65
MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
# Version 2.7.2 + ROCm related updates
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
# Folders for the build
PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata
@ -20,7 +20,7 @@ mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RE
# Fetch magma sources and verify checksum
pushd ${PACKAGE_DIR}
git clone https://github.com/jeffdaily/magma
git clone https://bitbucket.org/icl/magma.git
pushd magma
git checkout ${MAGMA_VERSION}
popd

View File

@ -142,7 +142,7 @@ time CMAKE_ARGS=${CMAKE_ARGS[@]} \
EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
python -m build --wheel --no-isolation --outdir /tmp/$WHEELHOUSE_DIR
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
echo "Finished setup.py bdist at $(date)"
# Build libtorch packages

View File

@ -104,7 +104,7 @@ if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
export ROCclr_DIR=/opt/rocm/rocclr/lib/cmake/rocclr
fi
echo "Calling -m pip install . -v --no-build-isolation at $(date)"
echo "Calling 'python -m pip install .' at $(date)"
if [[ $LIBTORCH_VARIANT = *"static"* ]]; then
STATIC_CMAKE_FLAG="-DTORCH_STATIC=1"

View File

@ -107,10 +107,6 @@ if [[ $ROCM_INT -ge 60200 ]]; then
ROCM_SO_FILES+=("librocm-core.so")
fi
if [[ $ROCM_INT -ge 70000 ]]; then
ROCM_SO_FILES+=("librocroller.so")
fi
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"

View File

@ -89,7 +89,7 @@ fi
if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
export USE_MKLDNN=1
export USE_MKLDNN_ACL=1
export ACL_ROOT_DIR=/acl
export ACL_ROOT_DIR=/ComputeLibrary
fi
if [[ "$BUILD_ENVIRONMENT" == *riscv64* ]]; then
@ -233,9 +233,7 @@ if [[ "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
fi
if [[ "$BUILD_ENVIRONMENT" == *-full-debug* ]]; then
export CMAKE_BUILD_TYPE=Debug
elif [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
export CMAKE_BUILD_TYPE=RelWithAssert
fi
@ -292,20 +290,15 @@ else
WERROR=1 python setup.py clean
WERROR=1 python -m build --wheel --no-isolation
WERROR=1 python setup.py bdist_wheel
else
python setup.py clean
if [[ "$BUILD_ENVIRONMENT" == *xla* ]]; then
source .ci/pytorch/install_cache_xla.sh
fi
python -m build --wheel --no-isolation
python setup.py bdist_wheel
fi
pip_install_whl "$(echo dist/*.whl)"
if [[ "$BUILD_ENVIRONMENT" == *full-debug* ]]; then
# Regression test for https://github.com/pytorch/pytorch/issues/164297
# Torch should be importable and that's about it
pushd /; python -c "import torch;print(torch.__config__.show(), torch.randn(5) + 1.7)"; popd
fi
if [[ "${BUILD_ADDITIONAL_PACKAGES:-}" == *vision* ]]; then
install_torchvision

View File

@ -58,7 +58,7 @@ time python tools/setup_helpers/generate_code.py \
# Build the docs
pushd docs/cpp
time make VERBOSE=1 html
time make VERBOSE=1 html -j
popd
popd

View File

@ -36,11 +36,11 @@ fi
print_cmake_info
if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then
# Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls
USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python -m build --wheel --no-isolation
USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel
else
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python -m build --wheel --no-isolation -C--build-option=--plat-name=macosx_11_0_arm64
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64
fi
if which sccache > /dev/null; then
print_sccache_stats

View File

@ -256,7 +256,7 @@ test_torchbench_smoketest() {
local device=mps
local dtypes=(undefined float16 bfloat16 notset)
local dtype=${dtypes[$1]}
local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor vgg16)
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
for backend in eager inductor; do
@ -319,7 +319,7 @@ test_aoti_torchbench_smoketest() {
local device=mps
local dtypes=(undefined float16 bfloat16 notset)
local dtype=${dtypes[$1]}
local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor vgg16)
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}"
local dtype_arg="--${dtype}"

View File

@ -26,7 +26,6 @@ if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
time python test/run_test.py --verbose -i distributed/test_aten_comm_compute_reordering
time python test/run_test.py --verbose -i distributed/test_store
time python test/run_test.py --verbose -i distributed/test_symmetric_memory
time python test/run_test.py --verbose -i distributed/test_pg_wrapper

View File

@ -32,9 +32,6 @@ LIBTORCH_NAMESPACE_LIST = (
"torch::",
)
# Patterns for detecting statically linked libstdc++ symbols
STATICALLY_LINKED_CXX11_ABI = [re.compile(r".*recursive_directory_iterator.*")]
def _apply_libtorch_symbols(symbols):
return [
@ -56,17 +53,12 @@ def get_symbols(lib: str) -> list[tuple[str, str, str]]:
return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]]
def grep_symbols(
lib: str, patterns: list[Any], symbol_type: str | None = None
) -> list[str]:
def grep_symbols(lib: str, patterns: list[Any]) -> list[str]:
def _grep_symbols(
symbols: list[tuple[str, str, str]], patterns: list[Any]
) -> list[str]:
rc = []
for _s_addr, _s_type, s_name in symbols:
# Filter by symbol type if specified
if symbol_type and _s_type != symbol_type:
continue
for pattern in patterns:
if pattern.match(s_name):
rc.append(s_name)
@ -88,18 +80,6 @@ def grep_symbols(
return functools.reduce(list.__add__, (x.result() for x in tasks), [])
def check_lib_statically_linked_libstdc_cxx_abi_symbols(lib: str) -> None:
cxx11_statically_linked_symbols = grep_symbols(
lib, STATICALLY_LINKED_CXX11_ABI, symbol_type="T"
)
num_statically_linked_symbols = len(cxx11_statically_linked_symbols)
print(f"num_statically_linked_symbols (T): {num_statically_linked_symbols}")
if num_statically_linked_symbols > 0:
raise RuntimeError(
f"Found statically linked libstdc++ symbols (recursive_directory_iterator): {cxx11_statically_linked_symbols[:100]}"
)
def check_lib_symbols_for_abi_correctness(lib: str) -> None:
print(f"lib: {lib}")
cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS)
@ -127,7 +107,6 @@ def main() -> None:
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
check_lib_symbols_for_abi_correctness(libtorch_cpu_path)
check_lib_statically_linked_libstdc_cxx_abi_symbols(libtorch_cpu_path)
if __name__ == "__main__":

View File

@ -34,14 +34,12 @@ fi
# Patch numba to avoid CUDA-13 crash, see https://github.com/pytorch/pytorch/issues/162878
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
NUMBA_CUDA_DIR=$(python -c "import os;import numba.cuda; print(os.path.dirname(numba.cuda.__file__))" 2>/dev/null || true)
if [ -n "$NUMBA_CUDA_DIR" ]; then
NUMBA_PATCH="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/numba-cuda-13.patch"
pushd "$NUMBA_CUDA_DIR"
patch -p4 <"$NUMBA_PATCH"
popd
fi
NUMBA_CUDA_DIR=$(python -c "import os;import numba.cuda; print(os.path.dirname(numba.cuda.__file__))" 2>/dev/null || true)
if [ -n "$NUMBA_CUDA_DIR" ]; then
NUMBA_PATCH="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/numba-cuda-13.patch"
pushd "$NUMBA_CUDA_DIR"
patch -p4 <"$NUMBA_PATCH"
popd
fi
echo "Environment variables:"
@ -337,13 +335,13 @@ test_python() {
test_python_smoke() {
# Smoke tests for H100/B200
time python test/run_test.py --include test_matmul_cuda test_scaled_matmul_cuda inductor/test_fp8 inductor/test_max_autotune $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 inductor/test_max_autotune $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
assert_git_not_dirty
}
test_python_smoke_b200() {
# Targeted smoke tests for B200 - staged approach to avoid too many failures
time python test/run_test.py --include test_matmul_cuda test_scaled_matmul_cuda inductor/test_fp8 $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
assert_git_not_dirty
}
@ -437,7 +435,7 @@ test_inductor_distributed() {
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
# with if required # gpus aren't available
python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_aten_comm_compute_reordering distributed/test_compute_comm_reordering --verbose
python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_compute_comm_reordering --verbose
assert_git_not_dirty
}
@ -838,7 +836,7 @@ test_dynamo_benchmark() {
elif [[ "${suite}" == "timm_models" ]]; then
export TORCHBENCH_ONLY_MODELS="inception_v3"
elif [[ "${suite}" == "torchbench" ]]; then
export TORCHBENCH_ONLY_MODELS="BERT_pytorch"
export TORCHBENCH_ONLY_MODELS="hf_Bert"
fi
fi
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
@ -869,13 +867,13 @@ test_inductor_torchbench_smoketest_perf() {
mkdir -p "$TEST_REPORTS_DIR"
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only BERT_pytorch \
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
--output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
# The threshold value needs to be actively maintained to make this check useful
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
# Check memory compression ratio for a few models
for test in BERT_pytorch yolov3; do
for test in hf_Albert timm_vision_transformer; do
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
--disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \
--only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv"
@ -886,7 +884,7 @@ test_inductor_torchbench_smoketest_perf() {
done
# Perform some "warm-start" runs for a few huggingface models.
for test in AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
for test in AlbertForQuestionAnswering AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
python benchmarks/dynamo/huggingface.py --accuracy --training --amp --inductor --device cuda --warm-start-latency \
--only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
python benchmarks/dynamo/check_accuracy.py \
@ -1417,7 +1415,7 @@ EOF
pip3 install -r requirements.txt
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
python -m build --wheel --no-isolation -C--build-option=--bdist-dir="base_bdist_tmp" --outdir "base_dist"
python setup.py bdist_wheel --bdist-dir="base_bdist_tmp" --dist-dir="base_dist"
python -mpip install base_dist/*.whl
echo "::endgroup::"
@ -1619,7 +1617,7 @@ test_operator_benchmark() {
test_inductor_set_cpu_affinity
cd benchmarks/operator_benchmark/pt_extension
python -m pip install . -v --no-build-isolation
python -m pip install .
cd "${TEST_DIR}"/benchmarks/operator_benchmark
$TASKSET python -m benchmark_all_test --device "$1" --tag-filter "$2" \
@ -1632,25 +1630,6 @@ test_operator_benchmark() {
--expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
}
test_operator_microbenchmark() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
TEST_DIR=$(pwd)
cd benchmarks/operator_benchmark/pt_extension
python -m pip install .
cd "${TEST_DIR}"/benchmarks/operator_benchmark
for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
--benchmark-name "PyTorch operator microbenchmark" --use-compile
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}.json" \
--benchmark-name "PyTorch operator microbenchmark"
done
}
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
(cd test && python -c "import torch; print(torch.__config__.show())")
@ -1707,8 +1686,6 @@ elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then
test_operator_benchmark cpu ${TEST_MODE}
fi
elif [[ "${TEST_CONFIG}" == *operator_microbenchmark* ]]; then
test_operator_microbenchmark
elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
test_inductor_distributed
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
@ -1817,8 +1794,6 @@ elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then
test_h100_distributed
elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then
test_h100_symm_mem
elif [[ "${TEST_CONFIG}" == "b200-symm-mem" ]]; then
test_h100_symm_mem
elif [[ "${TEST_CONFIG}" == h100_cutlass_backend ]]; then
test_h100_cutlass_backend
else

View File

@ -1,32 +0,0 @@
#!/bin/bash
set -ex -o pipefail
# Suppress ANSI color escape sequences
export TERM=vt100
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# shellcheck source=./common-build.sh
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
echo "Environment variables"
env
echo "Testing FA3 stable wheel still works with currently built torch"
echo "Installing ABI Stable FA3 wheel"
# The wheel was built on https://github.com/Dao-AILab/flash-attention/commit/b3846b059bf6b143d1cd56879933be30a9f78c81
# on torch nightly torch==2.9.0.dev20250830+cu129
$MAYBE_SUDO pip -q install https://s3.amazonaws.com/ossci-linux/wheels/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl
pushd flash-attention/hopper
export PYTHONPATH=$PWD
pytest -v -s \
"test_flash_attn.py::test_flash_attn_output[1-1-192-False-False-False-0.0-False-False-mha-dtype0]" \
"test_flash_attn.py::test_flash_attn_varlen_output[511-1-64-True-False-False-0.0-False-False-gqa-dtype2]" \
"test_flash_attn.py::test_flash_attn_kvcache[1-128-128-False-False-True-None-0.0-False-False-True-False-True-False-gqa-dtype0]" \
"test_flash_attn.py::test_flash_attn_race_condition[97-97-192-True-dtype0]" \
"test_flash_attn.py::test_flash_attn_combine[2-3-64-dtype1]" \
"test_flash_attn.py::test_flash3_bw_compatibility"
popd

View File

@ -70,7 +70,7 @@ sccache --zero-stats
sccache --show-stats
# Build the wheel
python -m build --wheel --no-build-isolation
python setup.py bdist_wheel
if ($LASTEXITCODE -ne 0) { exit 1 }
# Install the wheel locally

View File

@ -38,12 +38,10 @@ if errorlevel 1 goto fail
if not errorlevel 0 goto fail
:: Update CMake
:: TODO: Investigate why this helps MKL detection, even when CMake from choco is not used
call choco upgrade -y cmake --no-progress --installargs 'ADD_CMAKE_TO_PATH=System' --apply-install-arguments-to-dependencies --version=3.27.9
if errorlevel 1 goto fail
if not errorlevel 0 goto fail
:: TODO: Move to .ci/docker/requirements-ci.txt
call pip install mkl==2024.2.0 mkl-static==2024.2.0 mkl-include==2024.2.0
if errorlevel 1 goto fail
if not errorlevel 0 goto fail
@ -132,7 +130,7 @@ if "%USE_CUDA%"=="1" (
:: Print all existing environment variable for debugging
set
python -m build --wheel --no-isolation
python setup.py bdist_wheel
if errorlevel 1 goto fail
if not errorlevel 0 goto fail
sccache --show-stats

View File

@ -15,35 +15,37 @@ if errorlevel 1 exit /b 1
if not errorlevel 0 exit /b 1
cd %TMP_DIR_WIN%\build\torch\test
:: Enable delayed variable expansion to make the list
setlocal enabledelayedexpansion
set EXE_LIST=
for /r "." %%a in (*.exe) do (
if "%%~na" == "c10_intrusive_ptr_benchmark" (
@REM NB: This is not a gtest executable file, thus couldn't be handled by
@REM pytest-cpp and is excluded from test discovery by run_test
call "%%~fa"
call :libtorch_check "%%~na" "%%~fa"
if errorlevel 1 goto fail
if not errorlevel 0 goto fail
) else (
if "%%~na" == "verify_api_visibility" (
@REM Skip verify_api_visibility as it is a compile-level test
) else (
set EXE_LIST=!EXE_LIST! cpp/%%~na
)
)
)
goto :eof
:libtorch_check
cd %CWD%
set CPP_TESTS_DIR=%TMP_DIR_WIN%\build\torch\test
:: Run python test\run_test.py on the list
set NO_TD=True && python test\run_test.py --cpp --verbose -i !EXE_LIST!
if errorlevel 1 goto fail
if not errorlevel 0 goto fail
:: Skip verify_api_visibility as it a compile level test
if "%~1" == "verify_api_visibility" goto :eof
goto :eof
echo Running "%~2"
if "%~1" == "c10_intrusive_ptr_benchmark" (
:: NB: This is not a gtest executable file, thus couldn't be handled by pytest-cpp
call "%~2"
goto :eof
)
python test\run_test.py --cpp --verbose -i "cpp/%~1"
if errorlevel 1 (
echo %1 failed with exit code %errorlevel%
goto fail
)
if not errorlevel 0 (
echo %1 failed with exit code %errorlevel%
goto fail
)
:eof
exit /b 0

View File

@ -37,8 +37,27 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
export PYTORCH_TESTING_DEVICE_ONLY_FOR="cuda"
fi
# TODO: Move this to .ci/docker/requirements-ci.txt
python -m pip install "psutil==5.9.1" nvidia-ml-py "pytest-shard==0.1.2"
# TODO: Move both of them to Windows AMI
python -m pip install tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
# Copied from https://github.com/pytorch/test-infra/blob/be01a40157c36cd5a48391fdf44a7bc3ebd4c7e3/aws/ami/windows/scripts/Installers/Install-Pip-Dependencies.ps1#L16 with some adjustments
# pytest-rerunfailures==10.3 as 10.2 fails with INTERNALERROR> pluggy._manager.PluginValidationError: unknown hook 'pytest_configure_node'
# scipy from 1.6.3 to 1.10
# expecttest from 0.1.3 to 0.3.0
# xdoctest from 1.0.2 to 1.3.0
python -m pip install "future==0.18.2" "hypothesis==5.35.1" "expecttest==0.3.0" "librosa>=0.6.2" "scipy==1.10.1" "psutil==5.9.1" "pynvml==11.4.1" "pillow==9.2.0" "unittest-xml-reporting<=3.2.0,>=2.0.0" "pytest==7.1.3" "pytest-xdist==2.5.0" "pytest-flakefinder==1.1.0" "pytest-rerunfailures==10.3" "pytest-shard==0.1.2" "sympy==1.11.1" "xdoctest==1.3.0" "pygments==2.12.0" "opt-einsum>=3.3" "networkx==2.8.8" "mpmath==1.2.1" "pytest-cpp==2.3.0" "boto3==1.35.42"
# Install Z3 optional dependency for Windows builds.
python -m pip install z3-solver==4.15.1.0
# Install tlparse for test\dynamo\test_structured_trace.py UTs.
python -m pip install tlparse==0.4.0
# Install parameterized
python -m pip install parameterized==0.8.1
# Install pulp for testing ilps under torch\distributed\_tools
python -m pip install pulp==2.9.0
run_tests() {
# Run nvidia-smi if available

View File

@ -48,7 +48,7 @@ sccache --zero-stats
sccache --show-stats
:: Call PyTorch build script
python -m build --wheel --no-isolation --outdir "%PYTORCH_FINAL_PACKAGE_DIR%"
python setup.py bdist_wheel -d "%PYTORCH_FINAL_PACKAGE_DIR%"
:: show sccache stats
sccache --show-stats

View File

@ -37,10 +37,10 @@ IF "%CUDA_PATH_V128%"=="" (
)
IF "%BUILD_VISION%" == "" (
set TORCH_CUDA_ARCH_LIST=7.0;7.5;8.0;8.6;9.0;10.0;12.0
set TORCH_CUDA_ARCH_LIST=6.1;7.0;7.5;8.0;8.6;9.0;10.0;12.0
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
) ELSE (
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
)
set "CUDA_PATH=%CUDA_PATH_V128%"

View File

@ -28,5 +28,5 @@ start /wait "" python-amd64.exe /quiet InstallAllUsers=1 PrependPath=0 Include_t
if errorlevel 1 exit /b 1
set "PATH=%CD%\Python\Scripts;%CD%\Python;%PATH%"
%PYTHON_EXEC% -m pip install --upgrade pip setuptools packaging wheel build
%PYTHON_EXEC% -m pip install --upgrade pip setuptools packaging wheel
if errorlevel 1 exit /b 1

View File

@ -86,7 +86,7 @@ copy /Y "%LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip" "%PYTORCH_FINAL_PACKAGE_
goto build_end
:pytorch
%PYTHON_EXEC% -m build --wheel --no-isolation --outdir "%PYTORCH_FINAL_PACKAGE_DIR%"
%PYTHON_EXEC% setup.py bdist_wheel -d "%PYTORCH_FINAL_PACKAGE_DIR%"
:build_end
IF ERRORLEVEL 1 exit /b 1

View File

@ -63,7 +63,7 @@ if errorlevel 1 exit /b 1
call %CONDA_HOME%\condabin\activate.bat testenv
if errorlevel 1 exit /b 1
call conda install -y -q -c conda-forge libuv=1.51
call conda install -y -q -c conda-forge libuv=1.39
call conda install -y -q intel-openmp
echo "install and test libtorch"

View File

@ -18,7 +18,7 @@ if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install numpy==2.0.2 cmake
%PYTHON_EXEC% -m pip install pyyaml
%PYTHON_EXEC% -m pip install mkl-include mkl-static
%PYTHON_EXEC% -m pip install boto3 requests ninja typing_extensions setuptools==72.1.0
%PYTHON_EXEC% -m pip install boto3 ninja typing_extensions setuptools==72.1.0
where cmake.exe

View File

@ -143,8 +143,7 @@ case $desired_python in
RENAME_WHEEL=false
;;
3.13t)
echo "Using 3.13t deps"
mac_version='macosx-11.0-arm64'
echo "Using 3.13 deps"
NUMPY_PINNED_VERSION="==2.1.0"
RENAME_WHEEL=false
;;
@ -186,11 +185,11 @@ export USE_QNNPACK=OFF
export BUILD_TEST=OFF
pushd "$pytorch_rootdir"
echo "Calling -m build --wheel --no-isolation at $(date)"
echo "Calling setup.py bdist_wheel at $(date)"
_PYTHON_HOST_PLATFORM=${mac_version} ARCHFLAGS="-arch arm64" python -m build --wheel --no-isolation --outdir "$whl_tmp_dir" -C--plat-name="${mac_version//[-.]/_}"
_PYTHON_HOST_PLATFORM=${mac_version} ARCHFLAGS="-arch arm64" python setup.py bdist_wheel -d "$whl_tmp_dir" --plat-name "${mac_version//[-.]/_}"
echo "Finished -m build --wheel --no-isolation at $(date)"
echo "Finished setup.py bdist_wheel at $(date)"
if [[ $package_type != 'libtorch' ]]; then
echo "delocating wheel dependencies"

View File

@ -71,7 +71,14 @@ export PYTORCH_BUILD_NUMBER=1
# Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS
TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
TRITON_CONSTRAINT="platform_system == 'Linux'"
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
# CUDA 12.9/13.0 builds have triton for Linux and Linux aarch64 binaries.
if [[ "$DESIRED_CUDA" == "cu129" ]] || [[ "$DESIRED_CUDA" == "cu130" ]]; then
TRITON_CONSTRAINT="platform_system == 'Linux'"
fi
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"

View File

@ -0,0 +1,47 @@
#!/bin/bash
# =================== The following code **should** be executed inside Docker container ===================
# Install dependencies
sudo apt-get -y update
sudo apt-get -y install expect-dev
# This is where the local pytorch install in the docker image is located
pt_checkout="/var/lib/jenkins/workspace"
source "$pt_checkout/.ci/pytorch/common_utils.sh"
echo "functorch_doc_push_script.sh: Invoked with $*"
set -ex
version=${DOCS_VERSION:-nightly}
echo "version: $version"
# Build functorch docs
pushd $pt_checkout/functorch/docs
pip -q install -r requirements.txt
make html
popd
git clone https://github.com/pytorch/functorch -b gh-pages --depth 1 functorch_ghpages
pushd functorch_ghpages
if [ $version == "main" ]; then
version=nightly
fi
git rm -rf "$version" || true
mv "$pt_checkout/functorch/docs/build/html" "$version"
git add "$version" || true
git status
git config user.email "soumith+bot@pytorch.org"
git config user.name "pytorchbot"
# If there aren't changes, don't make a commit; push is no-op
git commit -m "Generate Python docs from pytorch/pytorch@${GITHUB_SHA}" || true
git status
if [[ "${WITH_PUSH:-}" == true ]]; then
git push -u origin gh-pages
fi
popd
# =================== The above code **should** be executed inside Docker container ===================

View File

@ -59,19 +59,16 @@ performance-*,
-performance-enum-size,
readability-container-size-empty,
readability-delete-null-pointer,
readability-duplicate-include,
readability-duplicate-include
readability-misplaced-array-index,
readability-redundant*,
readability-redundant*
readability-simplify-subscript-expr,
readability-string-compare,
-readability-redundant-access-specifiers,
-readability-redundant-control-flow,
-readability-redundant-inline-specifier,
'
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
WarningsAsErrors: '*'
LineFilter:
- name: '/usr/include/.*'
CheckOptions:
cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true
cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true

View File

@ -12,7 +12,7 @@ ignore =
# to line this up with executable bit
EXE001,
# these ignores are from flake8-bugbear; please fix!
B007,B008,B017,B019,B023,B028,B903,B905,B906,B907,B908,B910
B007,B008,B017,B019,B023,B028,B903,B904,B905,B906,B907,B908,B910
# these ignores are from flake8-comprehensions; please fix!
C407,
# these ignores are from flake8-logging-format; please fix!

View File

@ -1,10 +1,6 @@
---
name: "⚠️ CI SEV"
about: Tracking incidents for PyTorch's CI infra.
title: ''
labels: ''
assignees: ''
---
> NOTE: Remember to label this issue with "`ci: sev`"

View File

@ -1,18 +0,0 @@
---
name: DISABLE AUTOREVERT
about: Disables autorevert when open
title: "❌​\U0001F519 [DISABLE AUTOREVERT]"
labels: 'ci: disable-autorevert'
assignees: ''
---
This issue, while open, disables the autorevert functionality.
More details can be found [here](https://github.com/pytorch/test-infra/blob/main/aws/lambda/pytorch-auto-revert/README.md)
## Why are you disabling autorevert?
## Links to any issues/commits/errors that shows the source of problem

View File

@ -1,10 +1,8 @@
---
name: Disable CI jobs (PyTorch Dev Infra only)
about: Use this template to disable CI jobs
title: DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME]
labels: 'module: ci'
assignees: ''
title: "DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME]"
labels: "module: ci"
---
> For example, DISABLED pull / win-vs2022-cpu-py3 / test (default). Once

View File

@ -22,9 +22,6 @@ self-hosted-runner:
- linux.arm64.m7g.4xlarge
- linux.arm64.m7g.4xlarge.ephemeral
- linux.arm64.r7g.12xlarge.memory
- linux.aws.h100
- linux.aws.h100.4
- linux.aws.h100.8
- linux.4xlarge.nvidia.gpu
- linux.8xlarge.nvidia.gpu
- linux.16xlarge.nvidia.gpu

View File

@ -274,6 +274,8 @@ runs:
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
# Propagate download.pytorch.org IP to container
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" sudo bash -c "/bin/cat >> /etc/hosts"
echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
docker exec -t "${container_name}" sh -c "pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}"

View File

@ -28,10 +28,6 @@ runs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: Print GPU info (if present)
shell: bash
run: if [ -f /usr/bin/nvidia-smi ]; then nvidia-smi; fi
- name: Check if in a container runner
shell: bash
id: check_container_runner
@ -86,6 +82,37 @@ runs:
# Prune all of the docker images
docker system prune -af
- name: Manually resolve download.pytorch.org
shell: bash
continue-on-error: true
run: |
set +e
set -x
PT_DOMAIN=download.pytorch.org
# TODO: Flaky access to download.pytorch.org https://github.com/pytorch/pytorch/issues/100400,
# cleaning this up once the issue is fixed. There are more than one resolved IP here, the last
# one is returned at random
RESOLVED_IP=$(dig -4 +short "${PT_DOMAIN}" | tail -n1)
if [ -z "${RESOLVED_IP}" ]; then
echo "Couldn't resolve ${PT_DOMAIN}, retrying with Google DNS..."
RESOLVED_IP=$(dig -4 +short "${PT_DOMAIN}" @8.8.8.8 | tail -n1)
if [ -z "${RESOLVED_IP}" ]; then
echo "Couldn't resolve ${PT_DOMAIN}, exiting..."
exit 1
fi
fi
if grep -r "${PT_DOMAIN}" /etc/hosts; then
# Clean up any old records first
sudo sed -i "/${PT_DOMAIN}/d" /etc/hosts
fi
echo "${RESOLVED_IP} ${PT_DOMAIN}" | sudo tee -a /etc/hosts
cat /etc/hosts
- name: Check that the docker daemon is running
shell: bash
continue-on-error: true

View File

@ -111,16 +111,3 @@ runs:
# This video group ID maps to subgid 1 inside the docker image due to the /etc/subgid entries.
# The group name corresponding to group ID 1 can change depending on the OS, so both are necessary.
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd $DEVICE_FLAG --group-add video --group-add $render_gid --group-add daemon --group-add bin --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --network=host" >> "${GITHUB_ENV}"
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
role-duration-seconds: 18000
- name: Login to Amazon ECR
id: login-ecr
continue-on-error: true
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1

View File

@ -23,6 +23,9 @@ runs:
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Clean up leftover processes on non-ephemeral Windows runner
uses: pytorch/test-infra/.github/actions/cleanup-runner@main
# Cleaning up Windows workspace sometimes fails flakily with device or resource busy
# error, meaning one or more processes haven't stopped completely yet. So trying to
# retry this step several time similar to how checkout-pytorch GHA does

View File

@ -33,6 +33,10 @@ runs:
)
echo "CONTAINER_NAME=${container_name}" >> "$GITHUB_ENV"
if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" && "${BUILD_ENVIRONMENT}" != "linux-s390x-binary-manywheel" && "${GPU_ARCH_TYPE}" != "xpu" ]]; then
# Propagate download.pytorch.org IP to container. This is only needed on Linux non aarch64 runner
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" bash -c "/bin/cat >> /etc/hosts"
fi
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
# Generate test script

View File

@ -1 +1 @@
0ad9951c416d33c5da4f7a504fb162cbe62386f5
1983609239caaab24ab1ed2bfa2aa92e8c76c1b1

View File

@ -1 +1 @@
2a9138a26ee257fef05310ad3fecf7c55fe80d73
c77852e117bdf056c8e9a087e51d6f65cf6ba53d

View File

@ -202,7 +202,7 @@ ARG max_jobs=16
ENV MAX_JOBS=${max_jobs}
ARG nvcc_threads=4
ENV NVCC_THREADS=$nvcc_threads
ARG torch_cuda_arch_list='8.0 8.6 8.9 9.0'
ARG torch_cuda_arch_list='8.0;8.6;8.9;9.0'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
ARG USE_SCCACHE
@ -297,28 +297,16 @@ RUN echo "[INFO] Listing current directory before torch install step:" && \
echo "[INFO] Showing torch_build_versions.txt content:" && \
cat torch_build_versions.txt
# Install build and runtime dependencies, this is needed for flashinfer install
COPY requirements/build.txt requirements/build.txt
COPY use_existing_torch.py use_existing_torch.py
RUN python3 use_existing_torch.py
RUN cat requirements/build.txt
# Install uv for faster pip installs if not existed
RUN --mount=type=cache,target=/root/.cache/uv \
if ! python3 -m uv --version > /dev/null 2>&1; then \
python3 -m pip install uv==0.8.4; \
fi
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
# Use copy mode to avoid hardlink failures with Docker cache mounts
ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -r requirements/build.txt
# Default mount file as placeholder, this just avoid the mount error
ARG TORCH_WHEELS_PATH="./requirements"
# Install torch, torchaudio and torchvision
@ -344,11 +332,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Install xformers wheel from previous stage
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system /wheels/xformers/*.whl --verbose
# Build flashinfer from source.
ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0'
# install package for build flashinfer
# see issue: https://github.com/flashinfer-ai/flashinfer/issues/738
RUN pip install build==1.3.0
RUN pip freeze | grep -E 'setuptools|packaging|build'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}

View File

@ -1,14 +1,9 @@
import glob
import os
requires_files = glob.glob("requirements/*.txt")
requires_files += ["pyproject.toml"]
for file in requires_files:
if not os.path.exists(file):
print(f"!!! skipping missing {file}")
continue
print(f">>> cleaning {file}")
with open(file) as f:
lines = f.readlines()

View File

@ -1,45 +1,43 @@
tracking_issue: 24422
ciflow_tracking_issue: 64124
ciflow_push_tags:
- ciflow/b200
- ciflow/b200-symm-mem
- ciflow/binaries
- ciflow/binaries_libtorch
- ciflow/binaries_wheel
- ciflow/h100
- ciflow/h100-cutlass-backend
- ciflow/h100-distributed
- ciflow/h100-symm-mem
- ciflow/triton_binaries
- ciflow/inductor
- ciflow/inductor-cu126
- ciflow/inductor-micro-benchmark
- ciflow/inductor-micro-benchmark-cpu-x86
- ciflow/inductor-perf-compare
- ciflow/inductor-perf-test-nightly-rocm
- ciflow/inductor-perf-test-nightly-x86-zen
- ciflow/inductor-periodic
- ciflow/inductor-rocm
- ciflow/inductor-perf-test-nightly-rocm
- ciflow/inductor-perf-compare
- ciflow/inductor-micro-benchmark
- ciflow/inductor-micro-benchmark-cpu-x86
- ciflow/inductor-perf-test-nightly-x86-zen
- ciflow/inductor-cu126
- ciflow/linux-aarch64
- ciflow/mps
- ciflow/nightly
- ciflow/op-benchmark
- ciflow/periodic
- ciflow/periodic-rocm-mi300
- ciflow/pull
- ciflow/quantization-periodic
- ciflow/riscv64
- ciflow/rocm
- ciflow/rocm-mi300
- ciflow/rocm-mi355
- ciflow/s390
- ciflow/riscv64
- ciflow/slow
- ciflow/torchbench
- ciflow/triton_binaries
- ciflow/trunk
- ciflow/unstable
- ciflow/vllm
- ciflow/win-arm64
- ciflow/xpu
- ciflow/vllm
- ciflow/torchbench
- ciflow/op-benchmark
- ciflow/pull
- ciflow/h100
- ciflow/h100-distributed
- ciflow/win-arm64
- ciflow/h100-symm-mem
- ciflow/h100-cutlass-backend
- ciflow/b200
retryable_workflows:
- pull
- trunk
@ -48,4 +46,4 @@ retryable_workflows:
- inductor-A100-perf-nightly
labeler_config: labeler.yml
label_to_label_config: label_to_label.yml
mergebot: true
mergebot: True

View File

@ -0,0 +1,36 @@
boto3==1.35.42
cmake==3.27.*
expecttest==0.3.0
fbscribelogger==0.1.7
filelock==3.18.0
hypothesis==6.56.4
librosa>=0.6.2
mpmath==1.3.0
networkx==2.8.7
ninja==1.10.2.4
numba==0.59.0
numpy==1.26.4
opt-einsum>=3.3
optree==0.13.0
packaging==23.1
parameterized==0.8.1
pillow==10.3.0
protobuf==5.29.5
psutil==5.9.8
pygments==2.15.0
pytest-cpp==2.3.0
pytest-flakefinder==1.1.0
pytest-rerunfailures==10.3
pytest-subtests==0.13.1
pytest-xdist==3.3.1
pytest==7.3.2
pyyaml==6.0.2
scipy==1.12.0
setuptools==78.1.1
sympy==1.13.3
tlparse==0.4.0
tensorboard==2.13.0
typing-extensions==4.12.2
unittest-xml-reporting<=3.2.0,>=2.0.0
xdoctest==1.1.0
z3-solver==4.15.1.0

Binary file not shown.

View File

@ -502,7 +502,6 @@ def perform_misc_tasks(
job_name: str,
pr_body: str,
branch: Optional[str] = None,
tag: Optional[str] = None,
) -> None:
"""
In addition to apply the filter logic, the script also does the following
@ -510,9 +509,7 @@ def perform_misc_tasks(
"""
set_output(
"keep-going",
branch == MAIN_BRANCH
or bool(tag and re.match(r"^trunk/[a-f0-9]{40}$", tag))
or check_for_setting(labels, pr_body, "keep-going"),
branch == MAIN_BRANCH or check_for_setting(labels, pr_body, "keep-going"),
)
set_output(
"ci-verbose-test-logs",
@ -637,7 +634,6 @@ def main() -> None:
job_name=args.job_name,
pr_body=pr_body if pr_body else "",
branch=args.branch,
tag=tag,
)
# Set the filtered test matrix as the output

View File

@ -30,7 +30,7 @@ CUDA_ARCHES_CUDNN_VERSION = {
}
# NOTE: Please also update the ROCm sources in `PIP_SOURCES` in tools/nightly.py when changing this
ROCM_ARCHES = ["6.4", "7.0"]
ROCM_ARCHES = ["6.3", "6.4"]
XPU_ARCHES = ["xpu"]

View File

@ -127,6 +127,53 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
),
]
ROCM_SMOKE_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.LINUX,
package_type="manywheel",
build_variant="rocm",
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
OperatingSystem.LINUX,
arches=["6.4"],
python_versions=["3.10"],
),
ciflow_config=CIFlowConfig(
labels={
LABEL_CIFLOW_BINARIES,
LABEL_CIFLOW_BINARIES_WHEEL,
LABEL_CIFLOW_ROCM,
},
isolated_workflow=True,
),
branches="main",
),
]
LINUX_BINARY_SMOKE_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.LINUX,
package_type="manywheel",
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
OperatingSystem.LINUX,
arches=["13.0"],
python_versions=["3.12"],
),
branches="main",
),
BinaryBuildWorkflow(
os=OperatingSystem.LINUX,
package_type="libtorch",
build_variant=generate_binary_build_matrix.RELEASE,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.LINUX,
generate_binary_build_matrix.RELEASE,
arches=["cpu"],
libtorch_variants=["shared-with-deps"],
),
branches="main",
),
]
WINDOWS_BINARY_BUILD_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS,
@ -212,6 +259,39 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
),
]
WINDOWS_BINARY_SMOKE_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS,
package_type="libtorch",
build_variant=generate_binary_build_matrix.RELEASE,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.WINDOWS,
generate_binary_build_matrix.RELEASE,
arches=["cpu"],
libtorch_variants=["shared-with-deps"],
),
branches="main",
ciflow_config=CIFlowConfig(
isolated_workflow=True,
),
),
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS,
package_type="libtorch",
build_variant=generate_binary_build_matrix.DEBUG,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.WINDOWS,
generate_binary_build_matrix.DEBUG,
arches=["cpu"],
libtorch_variants=["shared-with-deps"],
),
branches="main",
ciflow_config=CIFlowConfig(
isolated_workflow=True,
),
),
]
MACOS_BINARY_BUILD_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.MACOS_ARM64,
@ -292,10 +372,23 @@ def main() -> None:
jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
S390X_BINARY_BUILD_WORKFLOWS,
),
(
# Give rocm it's own workflow file
jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
ROCM_SMOKE_WORKFLOWS,
),
(
jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
LINUX_BINARY_SMOKE_WORKFLOWS,
),
(
jinja_env.get_template("windows_binary_build_workflow.yml.j2"),
WINDOWS_BINARY_BUILD_WORKFLOWS,
),
(
jinja_env.get_template("windows_binary_build_workflow.yml.j2"),
WINDOWS_BINARY_SMOKE_WORKFLOWS,
),
(
jinja_env.get_template("macos_binary_build_workflow.yml.j2"),
MACOS_BINARY_BUILD_WORKFLOWS,

View File

@ -18,7 +18,6 @@ class GitHubComment:
body_text: str
created_at: str
author_login: str
author_url: Optional[str]
author_association: str
editor_login: Optional[str]
database_id: int

Binary file not shown.

View File

@ -38,7 +38,6 @@ def mock_get_comments() -> list[GitHubComment]:
body_text="mock_body_text",
created_at="",
author_login="",
author_url=None,
author_association="",
editor_login=None,
database_id=1,
@ -49,7 +48,6 @@ def mock_get_comments() -> list[GitHubComment]:
body_text=" #" + LABEL_ERR_MSG_TITLE.replace("`", ""),
created_at="",
author_login=BOT_AUTHORS[1],
author_url=None,
author_association="",
editor_login=None,
database_id=2,

View File

@ -32,7 +32,6 @@ from trymerge import (
main as trymerge_main,
MandatoryChecksMissingError,
MergeRule,
PostCommentError,
RE_GHSTACK_DESC,
read_merge_rules,
remove_job_name_suffix,
@ -589,23 +588,6 @@ class TestTryMerge(TestCase):
self.assertEqual(mock_merge_base, pr.get_merge_base())
mocked_gh_fetch_merge_base.assert_called_once()
def test_app_can_revert(self, *args: Any) -> None:
pr = GitHubPR("pytorch", "pytorch", 164660)
repo = DummyGitRepo()
app_comment_id, impostor_comment_id = 3375785595, 3377647892
# Check that app can revert
self.assertIsNotNone(validate_revert(repo, pr, comment_id=app_comment_id))
# But impostor can not
self.assertRaises(
PostCommentError,
lambda: validate_revert(repo, pr, comment_id=impostor_comment_id),
)
# Despite it's name being the name of the bot
self.assertEqual(
pr.get_comment_by_id(impostor_comment_id).author_login,
"pytorch-auto-revert",
)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")

View File

@ -234,7 +234,6 @@ query ($owner: String!, $name: String!, $number: Int!) {
createdAt
author {
login
url
}
authorAssociation
editor {
@ -1094,7 +1093,6 @@ class GitHubPR:
body_text=node["bodyText"],
created_at=node["createdAt"] if "createdAt" in node else "",
author_login=node["author"]["login"],
author_url=node["author"].get("url", None),
author_association=node["authorAssociation"],
editor_login=editor["login"] if editor else None,
database_id=node["databaseId"],
@ -2031,11 +2029,6 @@ def validate_revert(
# For some reason, one can not be a member of private repo, only CONTRIBUTOR
if pr.is_base_repo_private():
allowed_reverters.append("CONTRIBUTOR")
# Special case the pytorch-auto-revert app, whose does not have association
# But should be able to issue revert command
if comment.author_url == "https://github.com/apps/pytorch-auto-revert":
allowed_reverters.append("NONE")
if author_association not in allowed_reverters:
raise PostCommentError(
f"Will not revert as @{author_login} is not one of "

View File

@ -177,9 +177,6 @@ jobs:
runs-on: linux.rocm.gpu.mi250
timeout-minutes: !{{ common.timeout_minutes }}
!{{ upload.binary_env(config) }}
permissions:
id-token: write
contents: read
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm

View File

@ -67,7 +67,7 @@ jobs:
# an OOM issue when running the job, so this upgrades the runner from 4xlarge
# to the next available tier of 12xlarge. So much memory just to generate cpp
# doc
runner: ${{ inputs.runner_prefix }}linux.12xlarge.memory
runner: ${{ inputs.runner_prefix }}linux.12xlarge
# TODO: Nightly cpp docs take longer and longer to finish (more than 3h now)
# Let's try to figure out how this can be improved
timeout-minutes: 360

View File

@ -40,15 +40,6 @@ jobs:
# Use gh CLI to get changed files in the PR with explicit repo
CHANGED_FILES=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/files --paginate --jq '.[] | select(.status != "removed") | .filename' | tr '\n' ' ' | sed 's/ $//')
# See https://github.com/pytorch/pytorch/pull/134215#issuecomment-2332128790
PYI_FILES_TO_ADD=""
for file in ${CHANGED_FILES}; do
if [[ "${file}" == *".pyi.in" ]]; then
PYI_FILES_TO_ADD="${PYI_FILES_TO_ADD} ${file//.in/}"
fi
done
CHANGED_FILES="${CHANGED_FILES}${PYI_FILES_TO_ADD}"
if [ -z "$CHANGED_FILES" ]; then
echo "No changed files found, setting to '*'"
CHANGED_FILES="*"

View File

@ -1,255 +0,0 @@
# The point of this workflow is to test that a FA3 wheel that was built based off the
# stable ABI as of torch nightly 20250830 can still run on the newer torch.
#
# This workflow is very similar to the _linux-test.yml workflow, with the following
# differences:
# 1. It is simpler (there is no test matrix)
# 2. It pulls flash-attention as a secondary repository in order to access the tests.
# Note that it does not BUILD anything from flash-attention, as we have a prebuilt
# wheel. We pull flash-attention only to run a few tests.
# 3. It runs only FA3 tests. No PyTorch tests are run.
name: linux-test-stable-fa3
on:
workflow_call:
inputs:
build-environment:
required: true
type: string
description: Top-level label for what's being built/tested.
docker-image:
required: true
type: string
description: Docker image to run in.
timeout-minutes:
required: false
type: number
default: 30
description: |
Set the maximum (in minutes) how long the workflow should take to finish
s3-bucket:
description: S3 bucket to download artifact
required: false
type: string
default: "gha-artifacts"
secrets:
HUGGING_FACE_HUB_TOKEN:
required: false
description: |
HF Auth token to avoid rate limits when downloading models or datasets from hub
VLLM_TEST_HUGGING_FACE_TOKEN:
required: false
description: |
HF Auth token to test vllm
SCRIBE_GRAPHQL_ACCESS_TOKEN:
required: false
description: |
FB app token to write to scribe endpoint
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
jobs:
test:
# Don't run on forked repos
if: github.repository_owner == 'pytorch'
runs-on: linux.aws.h100
timeout-minutes: ${{ inputs.timeout-minutes || 30 }}
permissions:
id-token: write
contents: read
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
no-sudo: true
- name: Checkout flash-attention as a secondary repository
uses: actions/checkout@v4
with:
repository: Dao-AILab/flash-attention
path: flash-attention
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: ${{ inputs.docker-image }}
- name: Use following to pull public copy of the image
id: print-ghcr-mirror
env:
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
shell: bash
run: |
tag=${ECR_DOCKER_IMAGE##*:}
echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Check if in a container runner
shell: bash
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Setup GPU_FLAG for docker run
id: setup-gpu-flag
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
id: setup-sscache-port-flag
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
- name: Get workflow job id
id: get-job-id
uses: ./.github/actions/get-workflow-job-id
if: always()
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Download build artifacts
uses: ./.github/actions/download-build-artifacts
with:
name: ${{ inputs.build-environment }}
s3-bucket: ${{ inputs.s3-bucket }}
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Set Test step time
id: test-timeout
shell: bash
env:
JOB_TIMEOUT: ${{ inputs.timeout-minutes }}
run: |
echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"
- name: Preserve github env variables for use in docker
shell: bash
run: |
env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test
id: test
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
env:
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
PR_NUMBER: ${{ github.event.pull_request.number }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_WORKFLOW: ${{ github.workflow }}
GITHUB_JOB: ${{ github.job }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
BRANCH: ${{ steps.parse-ref.outputs.branch }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }}
SHM_SIZE: '2g'
DOCKER_IMAGE: ${{ inputs.docker-image }}
VLLM_TEST_HUGGING_FACE_TOKEN: ${{ secrets.VLLM_TEST_HUGGING_FACE_TOKEN }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
ARTIFACTS_FILE_SUFFIX: ${{ github.job }}-${{ steps.get-job-id.outputs.job-id }}
run: |
set -x
TEST_COMMAND=.ci/pytorch/test_fa3_abi_stable.sh
# Leaving 1GB for the runner and other things
TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo)
# https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap
# comes from https://github.com/pytorch/test-infra/pull/6058
TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" + 3))
SHM_OPTS="--shm-size=${SHM_SIZE}"
JENKINS_USER="--user jenkins"
DOCKER_SHELL_CMD=
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG, SHM_OPTS, JENKINS_USER and DOCKER_SHELL_CMD since that doesn't play nice
# shellcheck disable=SC2086,SC2090
container_name=$(docker run \
${GPU_FLAG:-} \
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e GITHUB_ACTIONS \
-e GITHUB_REPOSITORY \
-e GITHUB_WORKFLOW \
-e GITHUB_JOB \
-e GITHUB_RUN_ID \
-e GITHUB_RUN_NUMBER \
-e GITHUB_RUN_ATTEMPT \
-e JOB_ID \
-e JOB_NAME \
-e BASE_SHA \
-e BRANCH \
-e SHA1 \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e HUGGING_FACE_HUB_TOKEN \
-e VLLM_TEST_HUGGING_FACE_TOKEN \
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
-e ARTIFACTS_FILE_SUFFIX \
--memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \
--memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--ipc=host \
${SHM_OPTS} \
--tty \
--detach \
--name="${container_name}" \
${JENKINS_USER} \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
${DOCKER_SHELL_CMD}
)
echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
docker exec -t "${container_name}" sh -c "python3 -m pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}"
- name: Collect backtraces from coredumps (if any)
if: always()
run: |
# shellcheck disable=SC2156
find . -iname "core.[1-9]*" -exec docker exec "${DOCKER_CONTAINER_ID}" sh -c "gdb python {} -ex 'bt' -ex 'q'" \;
- name: Store Core dumps on S3
uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0
if: failure()
with:
name: coredumps-fa3-stable-abi-smoke-tests
retention-days: 14
if-no-files-found: ignore
path: ./**/core.[1-9]*
- name: Upload utilization stats
if: ${{ always() && steps.test.conclusion && steps.test.conclusion != 'skipped' }}
continue-on-error: true
uses: ./.github/actions/upload-utilization-stats
with:
job_id: ${{ steps.get-job-id.outputs.job-id }}
job_name: ${{ steps.get-job-id.outputs.job-name }}
workflow_name: ${{ github.workflow }}
workflow_run_id: ${{github.run_id}}
workflow_attempt: ${{github.run_attempt}}
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'

View File

@ -273,8 +273,6 @@ jobs:
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
EXTRA_FLAGS: ${{ matrix.extra_flags || '' }}
OP_BENCHMARK_TESTS: ${{ matrix.op_benchmark_tests }}
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
@ -389,6 +387,8 @@ jobs:
"${DOCKER_IMAGE}" \
${DOCKER_SHELL_CMD}
)
# Propagate download.pytorch.org IP to container
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" sudo bash -c "/bin/cat >> /etc/hosts"
echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then

View File

@ -85,7 +85,7 @@ jobs:
uses: pytorch/test-infra/.github/actions/setup-python@main
with:
python-version: ${{ inputs.python-version }}
pip-requirements-file: .ci/docker/requirements-ci.txt
pip-requirements-file: .github/requirements/pip-requirements-macOS.txt
- name: Install sccache (only for non-forked PRs, and pushes to trunk)
uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0

View File

@ -122,7 +122,7 @@ jobs:
uses: pytorch/test-infra/.github/actions/setup-python@main
with:
python-version: ${{ inputs.python-version }}
pip-requirements-file: .ci/docker/requirements-ci.txt
pip-requirements-file: .github/requirements/pip-requirements-macOS.txt
- name: Start monitoring script
id: monitor-script

View File

@ -102,6 +102,19 @@ jobs:
exit 1
fi
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
role-duration-seconds: 18000
- name: Login to Amazon ECR
id: login-ecr
continue-on-error: true
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main

View File

@ -84,6 +84,9 @@ jobs:
# in https://github.com/actions/checkout/issues/1018
git config --global core.fsmonitor false
- name: Clean up leftover processes on non-ephemeral Windows runner
uses: pytorch/test-infra/.github/actions/cleanup-runner@main
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
with:

View File

@ -77,6 +77,9 @@ jobs:
# in https://github.com/actions/checkout/issues/1018
git config --global core.fsmonitor false
- name: Clean up leftover processes on non-ephemeral Windows runner
uses: pytorch/test-infra/.github/actions/cleanup-runner@main
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
with:
@ -103,6 +106,18 @@ jobs:
with:
cuda-version: ${{ inputs.cuda-version }}
# TODO: Move to a requirements.txt file for windows
- name: Install pip dependencies
uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0
with:
shell: bash
timeout_minutes: 5
max_attempts: 5
retry_wait_seconds: 30
command: |
set -eu
python3 -m pip install 'xdoctest>=1.1.0'
- name: Get workflow job id
id: get-job-id
uses: ./.github/actions/get-workflow-job-id
@ -257,6 +272,15 @@ jobs:
shell: bash
run: python3 .github/scripts/parse_ref.py
- name: Uninstall PyTorch
if: always()
continue-on-error: true
shell: bash
run: |
# This step removes PyTorch installed by the test to give a clean slate
# to the next job
python3 -mpip uninstall -y torch
- name: Teardown Windows
uses: ./.github/actions/teardown-win
if: always()

View File

@ -1,60 +0,0 @@
name: Limited CI for symmetric memory tests on B200
on:
pull_request:
paths:
- .github/workflows/b200-symm-mem.yml
workflow_dispatch:
push:
tags:
- ciflow/b200-symm-mem/*
schedule:
- cron: 22 8 * * * # about 1:22am PDT
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm:
name: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runner: linux.12xlarge.memory
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
cuda-arch-list: '10.0'
test-matrix: |
{ include: [
{ config: "b200-symm-mem", shard: 1, num_shards: 1, runner: "linux.dgx.b200.8" },
]}
secrets: inherit
linux-jammy-cuda12_8-py3_10-gcc11-sm100-test:
name: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm
with:
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm.outputs.test-matrix }}
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
secrets: inherit

View File

@ -36,7 +36,7 @@ jobs:
runs-on: linux.9xlarge.ephemeral
strategy:
matrix:
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.4", "rocm7.0", "cpu"]
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.3", "rocm6.4", "cpu"]
steps:
- name: Build docker image
uses: pytorch/pytorch/.github/actions/binary-docker-build@main

View File

@ -52,8 +52,8 @@ jobs:
{ tag: "cuda12.9" },
{ tag: "cuda12.8" },
{ tag: "cuda12.6" },
{ tag: "rocm6.3" },
{ tag: "rocm6.4" },
{ tag: "rocm7.0" },
{ tag: "cpu" },
]
steps:

View File

@ -34,7 +34,7 @@ jobs:
id-token: write
strategy:
matrix:
rocm_version: ["70", "64"]
rocm_version: ["64", "63"]
steps:
- name: Checkout PyTorch
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

View File

@ -52,10 +52,11 @@ jobs:
{ name: "manylinuxaarch64-builder", tag: "cuda13.0", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "rocm6.3", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "rocm6.4", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "rocm7.0", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28_aarch64-builder", tag: "cpu-aarch64", runner: "linux.arm64.2xlarge.ephemeral" },
{ name: "manylinuxcxx11-abi-builder", tag: "cpu-cxx11-abi", runner: "linux.9xlarge.ephemeral" },
{ name: "manylinux2_28-builder", tag: "xpu", runner: "linux.9xlarge.ephemeral" },
]
runs-on: ${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}

View File

@ -55,7 +55,7 @@ jobs:
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
include:
- device: "rocm"
rocm_version: "7.0"
rocm_version: "6.4"
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
- device: "cuda"
rocm_version: ""

View File

@ -59,6 +59,7 @@ jobs:
pytorch-linux-jammy-py3.13-clang12,
pytorch-linux-jammy-rocm-n-py3,
pytorch-linux-noble-rocm-n-py3,
pytorch-linux-noble-rocm-alpha-py3,
pytorch-linux-jammy-rocm-n-py3-benchmarks,
pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12,
pytorch-linux-jammy-py3.10-gcc11,

View File

@ -316,6 +316,121 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm6_3-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.3
GPU_ARCH_VERSION: "6.3"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
timeout-minutes: 300
build_name: libtorch-rocm6_3-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
libtorch-rocm6_3-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-rocm6_3-shared-with-deps-release-build
- get-label-type
runs-on: linux.rocm.gpu.mi250
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.3
GPU_ARCH_VERSION: "6.3"
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: libtorch-rocm6_3-shared-with-deps-release
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: configure aws credentials
id: aws_creds
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: libtorch-cxx11-builder
custom-tag-prefix: rocm6.3
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
env:
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
libtorch-rocm6_3-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: libtorch-rocm6_3-shared-with-deps-release-test
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.3
GPU_ARCH_VERSION: "6.3"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
build_name: libtorch-rocm6_3-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm6_4-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@ -358,9 +473,6 @@ jobs:
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
permissions:
id-token: write
contents: read
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
@ -433,121 +545,3 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm7_0-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm7.0
GPU_ARCH_VERSION: "7.0"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm7.0
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
timeout-minutes: 300
build_name: libtorch-rocm7_0-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
libtorch-rocm7_0-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-rocm7_0-shared-with-deps-release-build
- get-label-type
runs-on: linux.rocm.gpu.mi250
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm7.0
GPU_ARCH_VERSION: "7.0"
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm7.0
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
permissions:
id-token: write
contents: read
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: libtorch-rocm7_0-shared-with-deps-release
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: configure aws credentials
id: aws_creds
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: libtorch-cxx11-builder
custom-tag-prefix: rocm7.0
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
env:
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
libtorch-rocm7_0-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: libtorch-rocm7_0-shared-with-deps-release-test
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm7.0
GPU_ARCH_VERSION: "7.0"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: rocm7.0
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
build_name: libtorch-rocm7_0-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml

View File

@ -0,0 +1,87 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_binary_build_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-binary-libtorch-release
on:
push:
branches:
- main
tags:
- 'ciflow/trunk/*'
workflow_dispatch:
permissions:
id-token: write
env:
# Needed for conda builds
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
AWS_DEFAULT_REGION: us-east-1
BINARY_ENV_FILE: /tmp/env
BUILD_ENVIRONMENT: linux-binary-libtorch-release
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
PYTORCH_ROOT: /pytorch
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SKIP_ALL_TESTS: 0
concurrency:
group: linux-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
libtorch-cpu-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: cpu
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: libtorch-cpu-shared-with-deps-release
build_environment: linux-binary-libtorch-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
libtorch-cpu-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-cpu-shared-with-deps-release-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: libtorch-cxx11-builder
DOCKER_IMAGE_TAG_PREFIX: cpu
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
build_name: libtorch-cpu-shared-with-deps-release
build_environment: linux-binary-libtorch-release
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -0,0 +1,88 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_binary_build_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-binary-manywheel
on:
push:
branches:
- main
tags:
- 'ciflow/trunk/*'
workflow_dispatch:
permissions:
id-token: write
env:
# Needed for conda builds
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
AWS_DEFAULT_REGION: us-east-1
BINARY_ENV_FILE: /tmp/env
BUILD_ENVIRONMENT: linux-binary-manywheel
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
PYTORCH_ROOT: /pytorch
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SKIP_ALL_TESTS: 0
concurrency:
group: linux-binary-manywheel-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
manywheel-py3_12-cuda13_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu130
GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: manylinux2_28-builder
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
DESIRED_PYTHON: "3.12"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_12-cuda13_0
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda13_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_12-cuda13_0-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu130
GPU_ARCH_VERSION: "13.0"
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: manylinux2_28-builder
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-cuda13_0
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,136 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_binary_build_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-binary-manywheel-rocm
on:
push:
branches:
- main
tags:
- 'ciflow/binaries/*'
- 'ciflow/binaries_wheel/*'
- 'ciflow/rocm/*'
workflow_dispatch:
permissions:
id-token: write
env:
# Needed for conda builds
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
AWS_DEFAULT_REGION: us-east-1
BINARY_ENV_FILE: /tmp/env
BUILD_ENVIRONMENT: linux-binary-manywheel-rocm
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
PYTORCH_ROOT: /pytorch
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SKIP_ALL_TESTS: 0
concurrency:
group: linux-binary-manywheel-rocm-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
manywheel-py3_10-rocm6_4-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.4
GPU_ARCH_VERSION: "6.4"
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: manylinux2_28-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
timeout-minutes: 300
build_name: manywheel-py3_10-rocm6_4
build_environment: linux-binary-manywheel-rocm
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-rocm6_4-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_10-rocm6_4-build
- get-label-type
runs-on: linux.rocm.gpu.mi250
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.4
GPU_ARCH_VERSION: "6.4"
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: manylinux2_28-builder
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
DESIRED_PYTHON: "3.10"
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: manywheel-py3_10-rocm6_4
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: configure aws credentials
id: aws_creds
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
role-duration-seconds: 18000
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
docker-image-name: manylinux2_28-builder
custom-tag-prefix: rocm6.4
docker-build-dir: .ci/docker
working-directory: pytorch
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
env:
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm

Some files were not shown because too many files have changed in this diff Show More