mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-29 19:24:55 +08:00
Compare commits
3 Commits
dev/joona/
...
mlazos/hc7
| Author | SHA1 | Date | |
|---|---|---|---|
| b862ec0bb0 | |||
| b9291698c3 | |||
| 8e81416492 |
@ -1,60 +1,82 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# Script used only in CD pipeline
|
# Script used only in CD pipeline
|
||||||
|
|
||||||
set -exou pipefail
|
set -eou pipefail
|
||||||
|
|
||||||
image="$1"
|
image="$1"
|
||||||
shift
|
shift
|
||||||
|
|
||||||
if [ -z "${image}" ]; then
|
if [ -z "${image}" ]; then
|
||||||
echo "Usage: $0 IMAGENAME:ARCHTAG"
|
echo "Usage: $0 IMAGE"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Go from imagename:tag to tag
|
DOCKER_IMAGE_NAME="pytorch/${image}"
|
||||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
|
||||||
|
|
||||||
CUDA_VERSION=""
|
|
||||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
|
||||||
# extract cuda version from image name and tag. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
|
||||||
CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
|
||||||
fi
|
|
||||||
|
|
||||||
case ${DOCKER_TAG_PREFIX} in
|
|
||||||
cpu)
|
|
||||||
BASE_TARGET=base
|
|
||||||
;;
|
|
||||||
cuda*)
|
|
||||||
BASE_TARGET=cuda${CUDA_VERSION}
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
|
||||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
|
||||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
|
||||||
sudo systemctl daemon-reload
|
|
||||||
sudo systemctl restart docker
|
|
||||||
|
|
||||||
export DOCKER_BUILDKIT=1
|
export DOCKER_BUILDKIT=1
|
||||||
TOPDIR=$(git rev-parse --show-toplevel)
|
TOPDIR=$(git rev-parse --show-toplevel)
|
||||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
|
||||||
|
|
||||||
docker build \
|
CUDA_VERSION=${CUDA_VERSION:-12.1}
|
||||||
--target final \
|
|
||||||
--progress plain \
|
|
||||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
|
||||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
|
||||||
--build-arg "DEVTOOLSET_VERSION=11" \
|
|
||||||
-t ${tmp_tag} \
|
|
||||||
$@ \
|
|
||||||
-f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
|
|
||||||
${TOPDIR}/.ci/docker/
|
|
||||||
|
|
||||||
if [ -n "${CUDA_VERSION}" ]; then
|
case ${CUDA_VERSION} in
|
||||||
|
cpu)
|
||||||
|
BASE_TARGET=base
|
||||||
|
DOCKER_TAG=cpu
|
||||||
|
;;
|
||||||
|
all)
|
||||||
|
BASE_TARGET=all_cuda
|
||||||
|
DOCKER_TAG=latest
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
BASE_TARGET=cuda${CUDA_VERSION}
|
||||||
|
DOCKER_TAG=cuda${CUDA_VERSION}
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||||
|
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||||
|
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl restart docker
|
||||||
|
|
||||||
|
docker build \
|
||||||
|
--target final \
|
||||||
|
--progress plain \
|
||||||
|
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||||
|
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||||
|
--build-arg "DEVTOOLSET_VERSION=11" \
|
||||||
|
-t ${DOCKER_IMAGE_NAME} \
|
||||||
|
$@ \
|
||||||
|
-f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
|
||||||
|
${TOPDIR}/.ci/docker/
|
||||||
|
)
|
||||||
|
|
||||||
|
if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then
|
||||||
# Test that we're using the right CUDA compiler
|
# Test that we're using the right CUDA compiler
|
||||||
docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}"
|
(
|
||||||
|
set -x
|
||||||
|
docker run --rm "${DOCKER_IMAGE_NAME}" nvcc --version | grep "cuda_${CUDA_VERSION}"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||||
|
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||||
|
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||||
|
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE_NAME}-${GIT_BRANCH_NAME}
|
||||||
|
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE_NAME}-${GIT_COMMIT_SHA}
|
||||||
|
if [[ "${WITH_PUSH:-}" == true ]]; then
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
docker push "${DOCKER_IMAGE_NAME}"
|
||||||
|
if [[ -n ${GITHUB_REF} ]]; then
|
||||||
|
docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||||
|
docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_SHA_TAG}
|
||||||
|
docker push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||||
|
docker push "${DOCKER_IMAGE_SHA_TAG}"
|
||||||
|
fi
|
||||||
|
)
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
381ae5d57d35c165d98df728380b20fbde350392
|
7e487c24e1c20c3f4606c2d8aca2778873b00b4c
|
||||||
|
|||||||
@ -19,13 +19,6 @@ install_ubuntu() {
|
|||||||
apt-get install -y libc++1
|
apt-get install -y libc++1
|
||||||
apt-get install -y libc++abi1
|
apt-get install -y libc++abi1
|
||||||
|
|
||||||
# Make sure rocm packages from repo.radeon.com have highest priority
|
|
||||||
cat << EOF > /etc/apt/preferences.d/rocm-pin-600
|
|
||||||
Package: *
|
|
||||||
Pin: release o=repo.radeon.com
|
|
||||||
Pin-Priority: 600
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# Add amdgpu repository
|
# Add amdgpu repository
|
||||||
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
|
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
|
||||||
echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
|
echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
|
||||||
|
|||||||
@ -25,7 +25,9 @@ python3 -m pip install meson ninja
|
|||||||
###########################
|
###########################
|
||||||
### clone repo
|
### clone repo
|
||||||
###########################
|
###########################
|
||||||
GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
|
# TEMPORARY FIX: https://gitlab.freedesktop.org/mesa/drm.git is down until 2025/03/22
|
||||||
|
# GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
|
||||||
|
GIT_SSL_NO_VERIFY=true git clone git://anongit.freedesktop.org/mesa/drm
|
||||||
pushd drm
|
pushd drm
|
||||||
|
|
||||||
###########################
|
###########################
|
||||||
|
|||||||
@ -17,14 +17,10 @@ function do_install() {
|
|||||||
tmp_dir=$(mktemp -d)
|
tmp_dir=$(mktemp -d)
|
||||||
pushd ${tmp_dir}
|
pushd ${tmp_dir}
|
||||||
curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
|
curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
|
||||||
if tar -xvf "${magma_archive}"
|
tar -xvf "${magma_archive}"
|
||||||
then
|
mkdir -p "${rocm_dir}/magma"
|
||||||
mkdir -p "${rocm_dir}/magma"
|
mv include "${rocm_dir}/magma/include"
|
||||||
mv include "${rocm_dir}/magma/include"
|
mv lib "${rocm_dir}/magma/lib"
|
||||||
mv lib "${rocm_dir}/magma/lib"
|
|
||||||
else
|
|
||||||
echo "${magma_archive} not found, skipping magma install"
|
|
||||||
fi
|
|
||||||
popd
|
popd
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -89,7 +89,7 @@ ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
|
|||||||
# gfortran and python needed for building magma from source for ROCm
|
# gfortran and python needed for building magma from source for ROCm
|
||||||
RUN apt-get update -y && \
|
RUN apt-get update -y && \
|
||||||
apt-get install gfortran -y && \
|
apt-get install gfortran -y && \
|
||||||
apt-get install python3 python-is-python3 -y && \
|
apt-get install python -y && \
|
||||||
apt-get clean
|
apt-get clean
|
||||||
|
|
||||||
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
||||||
|
|||||||
@ -1,63 +1,83 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# Script used only in CD pipeline
|
# Script used only in CD pipeline
|
||||||
|
|
||||||
set -eoux pipefail
|
set -eou pipefail
|
||||||
|
|
||||||
image="$1"
|
image="$1"
|
||||||
shift
|
shift
|
||||||
|
|
||||||
if [ -z "${image}" ]; then
|
if [ -z "${image}" ]; then
|
||||||
echo "Usage: $0 IMAGENAME:ARCHTAG"
|
echo "Usage: $0 IMAGE"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
DOCKER_IMAGE="pytorch/${image}"
|
||||||
|
|
||||||
TOPDIR=$(git rev-parse --show-toplevel)
|
TOPDIR=$(git rev-parse --show-toplevel)
|
||||||
|
|
||||||
|
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||||
|
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||||
|
|
||||||
|
WITH_PUSH=${WITH_PUSH:-}
|
||||||
|
|
||||||
DOCKER=${DOCKER:-docker}
|
DOCKER=${DOCKER:-docker}
|
||||||
|
|
||||||
# Go from imagename:tag to tag
|
case ${GPU_ARCH_TYPE} in
|
||||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
|
||||||
|
|
||||||
GPU_ARCH_VERSION=""
|
|
||||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
|
||||||
# extract cuda version from image name. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
|
||||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
|
||||||
elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
|
|
||||||
# extract rocm version from image name. e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
|
|
||||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
|
|
||||||
fi
|
|
||||||
|
|
||||||
case ${DOCKER_TAG_PREFIX} in
|
|
||||||
cpu)
|
cpu)
|
||||||
BASE_TARGET=cpu
|
BASE_TARGET=cpu
|
||||||
|
DOCKER_TAG=cpu
|
||||||
GPU_IMAGE=ubuntu:20.04
|
GPU_IMAGE=ubuntu:20.04
|
||||||
DOCKER_GPU_BUILD_ARG=""
|
DOCKER_GPU_BUILD_ARG=""
|
||||||
;;
|
;;
|
||||||
cuda*)
|
cuda)
|
||||||
BASE_TARGET=cuda${GPU_ARCH_VERSION}
|
BASE_TARGET=cuda${GPU_ARCH_VERSION}
|
||||||
|
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||||
GPU_IMAGE=ubuntu:20.04
|
GPU_IMAGE=ubuntu:20.04
|
||||||
DOCKER_GPU_BUILD_ARG=""
|
DOCKER_GPU_BUILD_ARG=""
|
||||||
;;
|
;;
|
||||||
rocm*)
|
rocm)
|
||||||
BASE_TARGET=rocm
|
BASE_TARGET=rocm
|
||||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||||
|
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
|
||||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "ERROR: Unrecognized DOCKER_TAG_PREFIX: ${DOCKER_TAG_PREFIX}"
|
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
|
||||||
|
|
||||||
DOCKER_BUILDKIT=1 ${DOCKER} build \
|
(
|
||||||
--target final \
|
set -x
|
||||||
${DOCKER_GPU_BUILD_ARG} \
|
DOCKER_BUILDKIT=1 ${DOCKER} build \
|
||||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
--target final \
|
||||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
${DOCKER_GPU_BUILD_ARG} \
|
||||||
-t "${tmp_tag}" \
|
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||||
$@ \
|
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||||
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
|
-t "${DOCKER_IMAGE}" \
|
||||||
"${TOPDIR}/.ci/docker/"
|
$@ \
|
||||||
|
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
|
||||||
|
"${TOPDIR}/.ci/docker/"
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||||
|
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||||
|
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||||
|
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
|
||||||
|
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
|
||||||
|
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
${DOCKER} push "${DOCKER_IMAGE}"
|
||||||
|
if [[ -n ${GITHUB_REF} ]]; then
|
||||||
|
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||||
|
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
|
||||||
|
${DOCKER} push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||||
|
${DOCKER} push "${DOCKER_IMAGE_SHA_TAG}"
|
||||||
|
fi
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# Script used only in CD pipeline
|
# Script used only in CD pipeline
|
||||||
|
|
||||||
set -exou pipefail
|
set -eou pipefail
|
||||||
|
|
||||||
TOPDIR=$(git rev-parse --show-toplevel)
|
TOPDIR=$(git rev-parse --show-toplevel)
|
||||||
|
|
||||||
@ -9,110 +9,152 @@ image="$1"
|
|||||||
shift
|
shift
|
||||||
|
|
||||||
if [ -z "${image}" ]; then
|
if [ -z "${image}" ]; then
|
||||||
echo "Usage: $0 IMAGE:ARCHTAG"
|
echo "Usage: $0 IMAGE"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Go from imagename:tag to tag
|
DOCKER_IMAGE="pytorch/${image}"
|
||||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
|
||||||
|
|
||||||
GPU_ARCH_VERSION=""
|
DOCKER_REGISTRY="${DOCKER_REGISTRY:-docker.io}"
|
||||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
|
||||||
# extract cuda version from image name. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
|
||||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
|
||||||
elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
|
|
||||||
# extract rocm version from image name. e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
|
|
||||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||||
|
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||||
MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-}
|
MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-}
|
||||||
DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-}
|
DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-}
|
||||||
|
WITH_PUSH=${WITH_PUSH:-}
|
||||||
|
|
||||||
case ${image} in
|
case ${GPU_ARCH_TYPE} in
|
||||||
manylinux2_28-builder:cpu)
|
cpu)
|
||||||
TARGET=cpu_final
|
TARGET=cpu_final
|
||||||
|
DOCKER_TAG=cpu
|
||||||
|
GPU_IMAGE=centos:7
|
||||||
|
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
|
||||||
|
;;
|
||||||
|
cpu-manylinux_2_28)
|
||||||
|
TARGET=cpu_final
|
||||||
|
DOCKER_TAG=cpu
|
||||||
GPU_IMAGE=amd64/almalinux:8
|
GPU_IMAGE=amd64/almalinux:8
|
||||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||||
MANY_LINUX_VERSION="2_28"
|
MANY_LINUX_VERSION="2_28"
|
||||||
;;
|
;;
|
||||||
manylinuxaarch64-builder:cpu-aarch64)
|
cpu-aarch64)
|
||||||
TARGET=final
|
TARGET=final
|
||||||
|
DOCKER_TAG=cpu-aarch64
|
||||||
GPU_IMAGE=arm64v8/centos:7
|
GPU_IMAGE=arm64v8/centos:7
|
||||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10"
|
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10"
|
||||||
MANY_LINUX_VERSION="aarch64"
|
MANY_LINUX_VERSION="aarch64"
|
||||||
;;
|
;;
|
||||||
manylinux2_28_aarch64-builder:cpu-aarch64)
|
cpu-aarch64-2_28)
|
||||||
TARGET=final
|
TARGET=final
|
||||||
|
DOCKER_TAG=cpu-aarch64
|
||||||
GPU_IMAGE=arm64v8/almalinux:8
|
GPU_IMAGE=arm64v8/almalinux:8
|
||||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11 --build-arg NINJA_VERSION=1.12.1"
|
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11 --build-arg NINJA_VERSION=1.12.1"
|
||||||
MANY_LINUX_VERSION="2_28_aarch64"
|
MANY_LINUX_VERSION="2_28_aarch64"
|
||||||
;;
|
;;
|
||||||
manylinuxcxx11-abi-builder:cpu-cxx11-abi)
|
cpu-cxx11-abi)
|
||||||
TARGET=final
|
TARGET=final
|
||||||
|
DOCKER_TAG=cpu-cxx11-abi
|
||||||
GPU_IMAGE=""
|
GPU_IMAGE=""
|
||||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
|
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
|
||||||
MANY_LINUX_VERSION="cxx11-abi"
|
MANY_LINUX_VERSION="cxx11-abi"
|
||||||
;;
|
;;
|
||||||
manylinuxs390x-builder:cpu-s390x)
|
cpu-s390x)
|
||||||
TARGET=final
|
TARGET=final
|
||||||
|
DOCKER_TAG=cpu-s390x
|
||||||
GPU_IMAGE=s390x/almalinux:8
|
GPU_IMAGE=s390x/almalinux:8
|
||||||
DOCKER_GPU_BUILD_ARG=""
|
DOCKER_GPU_BUILD_ARG=""
|
||||||
MANY_LINUX_VERSION="s390x"
|
MANY_LINUX_VERSION="s390x"
|
||||||
;;
|
;;
|
||||||
manylinux2_28-builder:cuda*)
|
cuda)
|
||||||
TARGET=cuda_final
|
TARGET=cuda_final
|
||||||
|
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||||
|
# Keep this up to date with the minimum version of CUDA we currently support
|
||||||
|
GPU_IMAGE=centos:7
|
||||||
|
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=9"
|
||||||
|
;;
|
||||||
|
cuda-manylinux_2_28)
|
||||||
|
TARGET=cuda_final
|
||||||
|
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||||
GPU_IMAGE=amd64/almalinux:8
|
GPU_IMAGE=amd64/almalinux:8
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
|
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
|
||||||
MANY_LINUX_VERSION="2_28"
|
MANY_LINUX_VERSION="2_28"
|
||||||
;;
|
;;
|
||||||
manylinuxaarch64-builder:cuda*)
|
cuda-aarch64)
|
||||||
TARGET=cuda_final
|
TARGET=cuda_final
|
||||||
|
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||||
GPU_IMAGE=arm64v8/centos:7
|
GPU_IMAGE=arm64v8/centos:7
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
|
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
|
||||||
MANY_LINUX_VERSION="aarch64"
|
MANY_LINUX_VERSION="aarch64"
|
||||||
DOCKERFILE_SUFFIX="_cuda_aarch64"
|
DOCKERFILE_SUFFIX="_cuda_aarch64"
|
||||||
;;
|
;;
|
||||||
manylinux2_28-builder:rocm*)
|
rocm|rocm-manylinux_2_28)
|
||||||
TARGET=rocm_final
|
TARGET=rocm_final
|
||||||
|
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||||
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
|
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
|
||||||
DEVTOOLSET_VERSION="9"
|
DEVTOOLSET_VERSION="9"
|
||||||
MANY_LINUX_VERSION="2_28"
|
if [ ${GPU_ARCH_TYPE} == "rocm-manylinux_2_28" ]; then
|
||||||
DEVTOOLSET_VERSION="11"
|
MANY_LINUX_VERSION="2_28"
|
||||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
DEVTOOLSET_VERSION="11"
|
||||||
|
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||||
|
fi
|
||||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||||
;;
|
;;
|
||||||
manylinux2_28-builder:xpu)
|
xpu)
|
||||||
TARGET=xpu_final
|
TARGET=xpu_final
|
||||||
|
DOCKER_TAG=xpu
|
||||||
GPU_IMAGE=amd64/almalinux:8
|
GPU_IMAGE=amd64/almalinux:8
|
||||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||||
MANY_LINUX_VERSION="2_28"
|
MANY_LINUX_VERSION="2_28"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "ERROR: Unrecognized image name: ${image}"
|
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
IMAGES=''
|
||||||
|
|
||||||
if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
|
if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
|
||||||
DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION}
|
DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION}
|
||||||
fi
|
fi
|
||||||
# Only activate this if in CI
|
(
|
||||||
if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
|
set -x
|
||||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
|
||||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
# Only activate this if in CI
|
||||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
|
||||||
sudo systemctl daemon-reload
|
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||||
sudo systemctl restart docker
|
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||||
|
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl restart docker
|
||||||
|
fi
|
||||||
|
|
||||||
|
DOCKER_BUILDKIT=1 docker build \
|
||||||
|
${DOCKER_GPU_BUILD_ARG} \
|
||||||
|
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||||
|
--target "${TARGET}" \
|
||||||
|
-t "${DOCKER_IMAGE}" \
|
||||||
|
$@ \
|
||||||
|
-f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
|
||||||
|
"${TOPDIR}/.ci/docker/"
|
||||||
|
)
|
||||||
|
|
||||||
|
GITHUB_REF=${GITHUB_REF:-"dev")}
|
||||||
|
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||||
|
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||||
|
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
|
||||||
|
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
|
||||||
|
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
docker push "${DOCKER_IMAGE}"
|
||||||
|
if [[ -n ${GITHUB_REF} ]]; then
|
||||||
|
docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||||
|
docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
|
||||||
|
docker push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||||
|
docker push "${DOCKER_IMAGE_SHA_TAG}"
|
||||||
|
fi
|
||||||
|
)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
|
||||||
|
|
||||||
DOCKER_BUILDKIT=1 docker build \
|
|
||||||
${DOCKER_GPU_BUILD_ARG} \
|
|
||||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
|
||||||
--target "${TARGET}" \
|
|
||||||
-t "${tmp_tag}" \
|
|
||||||
$@ \
|
|
||||||
-f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
|
|
||||||
"${TOPDIR}/.ci/docker/"
|
|
||||||
|
|||||||
@ -1,20 +1,15 @@
|
|||||||
sphinx==5.3.0
|
sphinx==5.3.0
|
||||||
#Description: This is used to generate PyTorch docs
|
#Description: This is used to generate PyTorch docs
|
||||||
#Pinned versions: 5.3.0
|
#Pinned versions: 5.3.0
|
||||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@a98ffecb792d50df495be401becbf5c414421423#egg=pytorch_sphinx_theme2
|
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
|
||||||
|
|
||||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||||
# but it doesn't seem to work and hangs around idly. The initial thought is probably
|
# but it doesn't seem to work and hangs around idly. The initial thought is probably
|
||||||
# something related to Docker setup. We can investigate this later
|
# something related to Docker setup. We can investigate this later
|
||||||
|
|
||||||
sphinxcontrib.katex==0.8.6
|
sphinxcontrib.katex==0.8.6
|
||||||
#Description: This is used to generate PyTorch docs
|
#Description: This is used to generate PyTorch docs
|
||||||
#Pinned versions: 0.8.6
|
#Pinned versions: 0.8.6
|
||||||
|
|
||||||
sphinxext-opengraph==0.9.1
|
|
||||||
#Description: This is used to generate PyTorch docs
|
|
||||||
#Pinned versions: 0.9.1
|
|
||||||
|
|
||||||
matplotlib==3.5.3
|
matplotlib==3.5.3
|
||||||
#Description: This is used to generate PyTorch docs
|
#Description: This is used to generate PyTorch docs
|
||||||
#Pinned versions: 3.5.3
|
#Pinned versions: 3.5.3
|
||||||
@ -51,6 +46,5 @@ myst-nb==0.17.2
|
|||||||
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
||||||
python-etcd==0.4.5
|
python-etcd==0.4.5
|
||||||
sphinx-copybutton==0.5.0
|
sphinx-copybutton==0.5.0
|
||||||
sphinx-design==0.4.0
|
sphinx-panels==0.4.1
|
||||||
sphinxcontrib-mermaid==1.0.0
|
|
||||||
myst-parser==0.18.1
|
myst-parser==0.18.1
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
SHELL=/usr/bin/env bash
|
SHELL=/usr/bin/env bash
|
||||||
|
|
||||||
DOCKER_CMD ?= docker
|
DOCKER_CMD ?= docker
|
||||||
DESIRED_ROCM ?= 6.4
|
DESIRED_ROCM ?= 6.3
|
||||||
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
||||||
PACKAGE_NAME = magma-rocm
|
PACKAGE_NAME = magma-rocm
|
||||||
# inherit this from underlying docker image, do not pass this env var to docker
|
# inherit this from underlying docker image, do not pass this env var to docker
|
||||||
@ -16,7 +16,6 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
|||||||
magma-rocm/build_magma.sh
|
magma-rocm/build_magma.sh
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
all: magma-rocm64
|
|
||||||
all: magma-rocm63
|
all: magma-rocm63
|
||||||
all: magma-rocm624
|
all: magma-rocm624
|
||||||
|
|
||||||
@ -25,11 +24,6 @@ clean:
|
|||||||
$(RM) -r magma-*
|
$(RM) -r magma-*
|
||||||
$(RM) -r output
|
$(RM) -r output
|
||||||
|
|
||||||
.PHONY: magma-rocm64
|
|
||||||
magma-rocm64: DESIRED_ROCM := 6.4
|
|
||||||
magma-rocm64:
|
|
||||||
$(DOCKER_RUN)
|
|
||||||
|
|
||||||
.PHONY: magma-rocm63
|
.PHONY: magma-rocm63
|
||||||
magma-rocm63: DESIRED_ROCM := 6.3
|
magma-rocm63: DESIRED_ROCM := 6.3
|
||||||
magma-rocm63:
|
magma-rocm63:
|
||||||
|
|||||||
@ -301,18 +301,6 @@ else
|
|||||||
fi
|
fi
|
||||||
pip_install_whl "$(echo dist/*.whl)"
|
pip_install_whl "$(echo dist/*.whl)"
|
||||||
|
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
|
||||||
echo "Checking that xpu is compiled"
|
|
||||||
pushd dist/
|
|
||||||
if python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)'; then
|
|
||||||
echo "XPU support is compiled in."
|
|
||||||
else
|
|
||||||
echo "XPU support is NOT compiled in."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
popd
|
|
||||||
fi
|
|
||||||
|
|
||||||
# TODO: I'm not sure why, but somehow we lose verbose commands
|
# TODO: I'm not sure why, but somehow we lose verbose commands
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
|
|||||||
@ -216,14 +216,6 @@ else
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
# Check XPU configured correctly
|
|
||||||
###############################################################################
|
|
||||||
if [[ "$DESIRED_CUDA" == 'xpu' && "$PACKAGE_TYPE" != 'libtorch' ]]; then
|
|
||||||
echo "Checking that xpu is compiled"
|
|
||||||
python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)'
|
|
||||||
fi
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Check CUDA configured correctly
|
# Check CUDA configured correctly
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|||||||
@ -34,14 +34,11 @@ if which sccache > /dev/null; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
print_cmake_info
|
print_cmake_info
|
||||||
if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then
|
|
||||||
# Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls
|
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
|
||||||
USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel
|
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
|
||||||
else
|
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||||
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
|
|
||||||
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
|
|
||||||
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
|
||||||
fi
|
|
||||||
if which sccache > /dev/null; then
|
if which sccache > /dev/null; then
|
||||||
print_sccache_stats
|
print_sccache_stats
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -221,39 +221,25 @@ test_torchbench_smoketest() {
|
|||||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||||
mkdir -p "$TEST_REPORTS_DIR"
|
mkdir -p "$TEST_REPORTS_DIR"
|
||||||
|
|
||||||
|
local backend=eager
|
||||||
|
local dtype=notset
|
||||||
local device=mps
|
local device=mps
|
||||||
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam pytorch_unet stable_diffusion_text_encoder moco speech_transformer)
|
|
||||||
|
|
||||||
for backend in eager inductor; do
|
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
|
||||||
|
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
|
||||||
|
|
||||||
for dtype in notset float16 bfloat16; do
|
echo "Setup complete, launching torchbench training performance run"
|
||||||
echo "Launching torchbench inference performance run for backend ${backend} and dtype ${dtype}"
|
for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do
|
||||||
local dtype_arg="--${dtype}"
|
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||||
if [ "$dtype" == notset ]; then
|
--performance --only "$model" --backend "$backend" --training --devices "$device" \
|
||||||
dtype_arg="--float32"
|
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
|
||||||
fi
|
done
|
||||||
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
|
|
||||||
for model in "${models[@]}"; do
|
|
||||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
|
||||||
--performance --only "$model" --backend "$backend" --inference --devices "$device" "$dtype_arg" \
|
|
||||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" || true
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
for dtype in notset amp; do
|
|
||||||
echo "Launching torchbench training performance run for backend ${backend} and dtype ${dtype}"
|
|
||||||
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
|
|
||||||
local dtype_arg="--${dtype}"
|
|
||||||
if [ "$dtype" == notset ]; then
|
|
||||||
dtype_arg="--float32"
|
|
||||||
fi
|
|
||||||
for model in "${models[@]}"; do
|
|
||||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
|
||||||
--performance --only "$model" --backend "$backend" --training --devices "$device" "$dtype_arg" \
|
|
||||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" || true
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
|
echo "Launching torchbench inference performance run"
|
||||||
|
for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do
|
||||||
|
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||||
|
--performance --only "$model" --backend "$backend" --inference --devices "$device" \
|
||||||
|
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "Pytorch benchmark on mps device completed"
|
echo "Pytorch benchmark on mps device completed"
|
||||||
|
|||||||
@ -119,6 +119,12 @@ popd
|
|||||||
git rm -rf "$install_path" || true
|
git rm -rf "$install_path" || true
|
||||||
mv "$pt_checkout/docs/build/html" "$install_path"
|
mv "$pt_checkout/docs/build/html" "$install_path"
|
||||||
|
|
||||||
|
# Prevent Google from indexing $install_path/_modules. This folder contains
|
||||||
|
# generated source files.
|
||||||
|
# NB: the following only works on gnu sed. The sed shipped with mac os is different.
|
||||||
|
# One can `brew install gnu-sed` on a mac and then use "gsed" instead of "sed".
|
||||||
|
find "$install_path/_modules" -name "*.html" -print0 | xargs -0 sed -i '/<head>/a \ \ <meta name="robots" content="noindex">'
|
||||||
|
|
||||||
git add "$install_path" || true
|
git add "$install_path" || true
|
||||||
git status
|
git status
|
||||||
git config user.email "soumith+bot@pytorch.org"
|
git config user.email "soumith+bot@pytorch.org"
|
||||||
|
|||||||
22
.circleci/scripts/binary_windows_arm64_build.sh
Normal file
22
.circleci/scripts/binary_windows_arm64_build.sh
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -eux -o pipefail
|
||||||
|
|
||||||
|
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||||
|
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||||
|
|
||||||
|
export USE_SCCACHE=1
|
||||||
|
export SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||||
|
|
||||||
|
echo "Free space on filesystem before build:"
|
||||||
|
df -h
|
||||||
|
|
||||||
|
export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
|
||||||
|
|
||||||
|
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||||
|
pytorch/.ci/pytorch/windows/arm64/build_libtorch.bat
|
||||||
|
elif [[ "$PACKAGE_TYPE" == 'wheel' ]]; then
|
||||||
|
pytorch/.ci/pytorch/windows/arm64/build_pytorch.bat
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Free space on filesystem after build:"
|
||||||
|
df -h
|
||||||
6
.circleci/scripts/binary_windows_arm64_test.sh
Normal file
6
.circleci/scripts/binary_windows_arm64_test.sh
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -eux -o pipefail
|
||||||
|
|
||||||
|
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||||
|
|
||||||
|
pytorch/.ci/pytorch/windows/arm64/smoke_test.bat
|
||||||
@ -4,13 +4,11 @@ set -eux -o pipefail
|
|||||||
source "${BINARY_ENV_FILE:-/c/w/env}"
|
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
|
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||||
|
|
||||||
if [[ "$OS" != "windows-arm64" ]]; then
|
export CUDA_VERSION="${DESIRED_CUDA/cu/}"
|
||||||
export CUDA_VERSION="${DESIRED_CUDA/cu/}"
|
export USE_SCCACHE=1
|
||||||
export USE_SCCACHE=1
|
export SCCACHE_BUCKET=ossci-compiler-cache
|
||||||
export SCCACHE_BUCKET=ossci-compiler-cache
|
export SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||||
export SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
export VC_YEAR=2022
|
||||||
export VC_YEAR=2022
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
||||||
export USE_SCCACHE=0
|
export USE_SCCACHE=0
|
||||||
@ -23,16 +21,7 @@ df -h
|
|||||||
|
|
||||||
pushd "$PYTORCH_ROOT/.ci/pytorch/"
|
pushd "$PYTORCH_ROOT/.ci/pytorch/"
|
||||||
export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
|
export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
|
||||||
|
./windows/internal/build_wheels.bat
|
||||||
if [[ "$OS" == "windows-arm64" ]]; then
|
|
||||||
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
|
||||||
./windows/arm64/build_libtorch.bat
|
|
||||||
elif [[ "$PACKAGE_TYPE" == 'wheel' ]]; then
|
|
||||||
./windows/arm64/build_pytorch.bat
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
./windows/internal/build_wheels.bat
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Free space on filesystem after build:"
|
echo "Free space on filesystem after build:"
|
||||||
df -h
|
df -h
|
||||||
|
|||||||
@ -11,11 +11,6 @@ if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
pushd "$PYTORCH_ROOT/.ci/pytorch/"
|
pushd "$PYTORCH_ROOT/.ci/pytorch/"
|
||||||
|
./windows/internal/smoke_test.bat
|
||||||
if [[ "$OS" == "windows-arm64" ]]; then
|
|
||||||
./windows/arm64/smoke_test.bat
|
|
||||||
else
|
|
||||||
./windows/internal/smoke_test.bat
|
|
||||||
fi
|
|
||||||
|
|
||||||
popd
|
popd
|
||||||
|
|||||||
6
.github/actionlint.yaml
vendored
6
.github/actionlint.yaml
vendored
@ -45,14 +45,10 @@ self-hosted-runner:
|
|||||||
- windows.g5.4xlarge.nvidia.gpu
|
- windows.g5.4xlarge.nvidia.gpu
|
||||||
# Windows ARM64 runners
|
# Windows ARM64 runners
|
||||||
- windows-11-arm64
|
- windows-11-arm64
|
||||||
# Organization-wide AMD-hosted runners
|
# Organization-wide AMD hosted runners
|
||||||
# MI2xx runners
|
|
||||||
- linux.rocm.gpu
|
- linux.rocm.gpu
|
||||||
- linux.rocm.gpu.2
|
- linux.rocm.gpu.2
|
||||||
- linux.rocm.gpu.4
|
- linux.rocm.gpu.4
|
||||||
# MI300 runners
|
|
||||||
- linux.rocm.gpu.mi300.2
|
|
||||||
- linux.rocm.gpu.mi300.4
|
|
||||||
- rocm-docker
|
- rocm-docker
|
||||||
# Repo-specific Apple hosted runners
|
# Repo-specific Apple hosted runners
|
||||||
- macos-m1-ultra
|
- macos-m1-ultra
|
||||||
|
|||||||
70
.github/actions/binary-docker-build/action.yml
vendored
70
.github/actions/binary-docker-build/action.yml
vendored
@ -1,70 +0,0 @@
|
|||||||
name: Binary docker build
|
|
||||||
|
|
||||||
description: Build docker image for binary builds
|
|
||||||
|
|
||||||
inputs:
|
|
||||||
docker-image-name:
|
|
||||||
description: Docker image name for PR builds
|
|
||||||
required: true
|
|
||||||
docker-build-dir:
|
|
||||||
description: Location of the build.sh relative to .ci/docker
|
|
||||||
required: true
|
|
||||||
custom-tag-prefix:
|
|
||||||
description: Custom tag prefix for the docker image
|
|
||||||
required: false
|
|
||||||
DOCKER_TOKEN:
|
|
||||||
description: Docker token for authentication
|
|
||||||
required: true
|
|
||||||
DOCKER_ID:
|
|
||||||
description: Docker ID for authentication
|
|
||||||
required: true
|
|
||||||
|
|
||||||
runs:
|
|
||||||
using: composite
|
|
||||||
steps:
|
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
|
||||||
|
|
||||||
- name: Calculate docker image
|
|
||||||
id: calculate-docker-image
|
|
||||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
|
||||||
with:
|
|
||||||
docker-image-name: ${{ inputs.docker-image-name }}
|
|
||||||
docker-build-dir: .ci/docker
|
|
||||||
custom-tag-prefix: ${{ inputs.custom-tag-prefix }}
|
|
||||||
docker-build-script: ${{ inputs.docker-build-dir }}/build.sh
|
|
||||||
always-rebuild: true
|
|
||||||
push: true
|
|
||||||
|
|
||||||
- name: Tag and (if WITH_PUSH) push docker image to docker.io
|
|
||||||
env:
|
|
||||||
DOCKER_TOKEN: ${{ inputs.DOCKER_TOKEN }}
|
|
||||||
DOCKER_ID: ${{ inputs.DOCKER_ID }}
|
|
||||||
DOCKER_IMAGE_NAME: ${{ inputs.docker-image-name }}
|
|
||||||
DOCKER_IMAGE_PREFIX: ${{ inputs.custom-tag-prefix }}
|
|
||||||
CREATED_FULL_DOCKER_IMAGE_NAME: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -euox pipefail
|
|
||||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
|
||||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
|
||||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
|
||||||
CI_FOLDER_SHA=$(git rev-parse HEAD:.ci/docker)
|
|
||||||
|
|
||||||
DOCKER_IMAGE_NAME_PREFIX=docker.io/pytorch/${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_PREFIX}
|
|
||||||
|
|
||||||
docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}
|
|
||||||
docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_BRANCH_NAME}
|
|
||||||
docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_COMMIT_SHA}
|
|
||||||
docker tag ${CREATED_FULL_DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_NAME_PREFIX}-${CI_FOLDER_SHA}
|
|
||||||
|
|
||||||
# Pretty sure Github will mask tokens and I'm not sure if it will even be
|
|
||||||
# printed due to pipe, but just in case
|
|
||||||
set +x
|
|
||||||
if [[ ${WITH_PUSH:-false} == "true" ]]; then
|
|
||||||
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
|
||||||
docker push ${DOCKER_IMAGE_NAME_PREFIX}
|
|
||||||
docker push ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_BRANCH_NAME}
|
|
||||||
docker push ${DOCKER_IMAGE_NAME_PREFIX}-${GIT_COMMIT_SHA}
|
|
||||||
docker push ${DOCKER_IMAGE_NAME_PREFIX}-${CI_FOLDER_SHA}
|
|
||||||
fi
|
|
||||||
19
.github/labeler.yml
vendored
19
.github/labeler.yml
vendored
@ -112,22 +112,3 @@
|
|||||||
- torch/csrc/inductor/aoti_include/xpu.h
|
- torch/csrc/inductor/aoti_include/xpu.h
|
||||||
- torch/csrc/inductor/cpp_wrapper/device_internal/xpu.h
|
- torch/csrc/inductor/cpp_wrapper/device_internal/xpu.h
|
||||||
- torch/csrc/inductor/cpp_wrapper/xpu.h
|
- torch/csrc/inductor/cpp_wrapper/xpu.h
|
||||||
|
|
||||||
"release notes: inductor (aoti)":
|
|
||||||
- torch/_C/_aoti.pyi
|
|
||||||
- torch/_dynamo/repro/aoti.py
|
|
||||||
- torch/_export/serde/aoti_schema.py
|
|
||||||
- torch/_higher_order_ops/aoti_call_delegate.py
|
|
||||||
- torch/_inductor/codegen/aoti_runtime/**
|
|
||||||
- torch/_inductor/codegen/aoti_hipify_utils.py
|
|
||||||
- torch/_inductor/codegen/cpp_wrapper_cpu.py
|
|
||||||
- torch/_inductor/codegen/cpp_wrapper_gpu.py
|
|
||||||
- torch/_inductor/aoti_eager.py
|
|
||||||
- torch/csrc/inductor/aoti_runtime/**
|
|
||||||
- torch/csrc/inductor/aoti_torch/**
|
|
||||||
- torch/csrc/inductor/aoti_runner/**
|
|
||||||
- torch/csrc/inductor/aoti_eager/**
|
|
||||||
- torch/csrc/inductor/aoti_package/**
|
|
||||||
- torch/csrc/inductor/aoti_include/**
|
|
||||||
- torchgen/aoti/**
|
|
||||||
- torchgen/gen_aoti_c_shim.py
|
|
||||||
|
|||||||
1
.github/pytorch-probot.yml
vendored
1
.github/pytorch-probot.yml
vendored
@ -16,7 +16,6 @@ ciflow_push_tags:
|
|||||||
- ciflow/mps
|
- ciflow/mps
|
||||||
- ciflow/nightly
|
- ciflow/nightly
|
||||||
- ciflow/periodic
|
- ciflow/periodic
|
||||||
- ciflow/periodic-rocm-mi300
|
|
||||||
- ciflow/rocm
|
- ciflow/rocm
|
||||||
- ciflow/rocm-mi300
|
- ciflow/rocm-mi300
|
||||||
- ciflow/s390
|
- ciflow/s390
|
||||||
|
|||||||
@ -30,7 +30,7 @@ CUDA_ARCHES_CUDNN_VERSION = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# NOTE: Also update the ROCm sources in tools/nightly.py when changing this list
|
# NOTE: Also update the ROCm sources in tools/nightly.py when changing this list
|
||||||
ROCM_ARCHES = ["6.3", "6.4"]
|
ROCM_ARCHES = ["6.2.4", "6.3"]
|
||||||
|
|
||||||
XPU_ARCHES = ["xpu"]
|
XPU_ARCHES = ["xpu"]
|
||||||
|
|
||||||
@ -173,7 +173,7 @@ WHEEL_CONTAINER_IMAGES = {
|
|||||||
"xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}",
|
"xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}",
|
||||||
"cpu": f"pytorch/manylinux2_28-builder:cpu-{DEFAULT_TAG}",
|
"cpu": f"pytorch/manylinux2_28-builder:cpu-{DEFAULT_TAG}",
|
||||||
"cpu-aarch64": f"pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
|
"cpu-aarch64": f"pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
|
||||||
"cpu-s390x": "pytorch/manylinuxs390x-builder:cpu-s390x",
|
"cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
|
||||||
}
|
}
|
||||||
|
|
||||||
RELEASE = "release"
|
RELEASE = "release"
|
||||||
|
|||||||
73
.github/scripts/generate_ci_workflows.py
vendored
73
.github/scripts/generate_ci_workflows.py
vendored
@ -227,6 +227,42 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
|||||||
isolated_workflow=True,
|
isolated_workflow=True,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
WINDOWS_BINARY_SMOKE_WORKFLOWS = [
|
||||||
|
BinaryBuildWorkflow(
|
||||||
|
os=OperatingSystem.WINDOWS,
|
||||||
|
package_type="libtorch",
|
||||||
|
build_variant=generate_binary_build_matrix.RELEASE,
|
||||||
|
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||||
|
OperatingSystem.WINDOWS,
|
||||||
|
generate_binary_build_matrix.RELEASE,
|
||||||
|
arches=["cpu"],
|
||||||
|
libtorch_variants=["shared-with-deps"],
|
||||||
|
),
|
||||||
|
branches="main",
|
||||||
|
ciflow_config=CIFlowConfig(
|
||||||
|
isolated_workflow=True,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
BinaryBuildWorkflow(
|
||||||
|
os=OperatingSystem.WINDOWS,
|
||||||
|
package_type="libtorch",
|
||||||
|
build_variant=generate_binary_build_matrix.DEBUG,
|
||||||
|
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||||
|
OperatingSystem.WINDOWS,
|
||||||
|
generate_binary_build_matrix.DEBUG,
|
||||||
|
arches=["cpu"],
|
||||||
|
libtorch_variants=["shared-with-deps"],
|
||||||
|
),
|
||||||
|
branches="main",
|
||||||
|
ciflow_config=CIFlowConfig(
|
||||||
|
isolated_workflow=True,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS = [
|
||||||
BinaryBuildWorkflow(
|
BinaryBuildWorkflow(
|
||||||
os=OperatingSystem.WINDOWS_ARM64,
|
os=OperatingSystem.WINDOWS_ARM64,
|
||||||
package_type="wheel",
|
package_type="wheel",
|
||||||
@ -272,39 +308,6 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
|||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
WINDOWS_BINARY_SMOKE_WORKFLOWS = [
|
|
||||||
BinaryBuildWorkflow(
|
|
||||||
os=OperatingSystem.WINDOWS,
|
|
||||||
package_type="libtorch",
|
|
||||||
build_variant=generate_binary_build_matrix.RELEASE,
|
|
||||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
|
||||||
OperatingSystem.WINDOWS,
|
|
||||||
generate_binary_build_matrix.RELEASE,
|
|
||||||
arches=["cpu"],
|
|
||||||
libtorch_variants=["shared-with-deps"],
|
|
||||||
),
|
|
||||||
branches="main",
|
|
||||||
ciflow_config=CIFlowConfig(
|
|
||||||
isolated_workflow=True,
|
|
||||||
),
|
|
||||||
),
|
|
||||||
BinaryBuildWorkflow(
|
|
||||||
os=OperatingSystem.WINDOWS,
|
|
||||||
package_type="libtorch",
|
|
||||||
build_variant=generate_binary_build_matrix.DEBUG,
|
|
||||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
|
||||||
OperatingSystem.WINDOWS,
|
|
||||||
generate_binary_build_matrix.DEBUG,
|
|
||||||
arches=["cpu"],
|
|
||||||
libtorch_variants=["shared-with-deps"],
|
|
||||||
),
|
|
||||||
branches="main",
|
|
||||||
ciflow_config=CIFlowConfig(
|
|
||||||
isolated_workflow=True,
|
|
||||||
),
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
MACOS_BINARY_BUILD_WORKFLOWS = [
|
MACOS_BINARY_BUILD_WORKFLOWS = [
|
||||||
BinaryBuildWorkflow(
|
BinaryBuildWorkflow(
|
||||||
os=OperatingSystem.MACOS_ARM64,
|
os=OperatingSystem.MACOS_ARM64,
|
||||||
@ -399,6 +402,10 @@ def main() -> None:
|
|||||||
jinja_env.get_template("windows_binary_build_workflow.yml.j2"),
|
jinja_env.get_template("windows_binary_build_workflow.yml.j2"),
|
||||||
WINDOWS_BINARY_SMOKE_WORKFLOWS,
|
WINDOWS_BINARY_SMOKE_WORKFLOWS,
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
jinja_env.get_template("windows_arm64_binary_build_workflow.yml.j2"),
|
||||||
|
WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS,
|
||||||
|
),
|
||||||
(
|
(
|
||||||
jinja_env.get_template("macos_binary_build_workflow.yml.j2"),
|
jinja_env.get_template("macos_binary_build_workflow.yml.j2"),
|
||||||
MACOS_BINARY_BUILD_WORKFLOWS,
|
MACOS_BINARY_BUILD_WORKFLOWS,
|
||||||
|
|||||||
2
.github/scripts/trymerge.py
vendored
2
.github/scripts/trymerge.py
vendored
@ -434,7 +434,7 @@ query ($owner: String!, $name: String!) {
|
|||||||
RE_GHSTACK_HEAD_REF = re.compile(r"^(gh/[^/]+/[0-9]+/)head$")
|
RE_GHSTACK_HEAD_REF = re.compile(r"^(gh/[^/]+/[0-9]+/)head$")
|
||||||
RE_GHSTACK_DESC = re.compile(r"Stack.*:\r?\n(\* [^\r\n]+\r?\n)+", re.MULTILINE)
|
RE_GHSTACK_DESC = re.compile(r"Stack.*:\r?\n(\* [^\r\n]+\r?\n)+", re.MULTILINE)
|
||||||
RE_PULL_REQUEST_RESOLVED = re.compile(
|
RE_PULL_REQUEST_RESOLVED = re.compile(
|
||||||
r"(Pull Request resolved|Pull-Request-resolved): "
|
r"Pull Request resolved: "
|
||||||
r"https://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/pull/(?P<number>[0-9]+)",
|
r"https://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/pull/(?P<number>[0-9]+)",
|
||||||
re.MULTILINE,
|
re.MULTILINE,
|
||||||
)
|
)
|
||||||
|
|||||||
197
.github/templates/windows_arm64_binary_build_workflow.yml.j2
vendored
Normal file
197
.github/templates/windows_arm64_binary_build_workflow.yml.j2
vendored
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
{% import 'common.yml.j2' as common %}
|
||||||
|
{% import 'upload.yml.j2' as upload %}
|
||||||
|
|
||||||
|
{%- block name -%}
|
||||||
|
# Template is at: .github/templates/windows_arm64_binary_build_workflow.yml.j2
|
||||||
|
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||||
|
name: !{{ build_environment }}
|
||||||
|
{%- endblock %}
|
||||||
|
|
||||||
|
{%- macro set_runner_specific_vars() -%}
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
||||||
|
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
||||||
|
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- !{{ branches }}
|
||||||
|
{%- if branches == "nightly" %}
|
||||||
|
tags:
|
||||||
|
# NOTE: Binary build pipelines should only get triggered on release candidate builds
|
||||||
|
# Release candidate tags look like: v1.11.0-rc1
|
||||||
|
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||||
|
{%- endif %}
|
||||||
|
{%- for label in ciflow_config.labels | sort %}
|
||||||
|
{%- if loop.first and branches != "nightly" %}
|
||||||
|
tags:
|
||||||
|
{%- endif %}
|
||||||
|
- '!{{ label }}/*'
|
||||||
|
{%- endfor %}
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
env:
|
||||||
|
BUILD_ENVIRONMENT: !{{ build_environment }}
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
|
SKIP_ALL_TESTS: 1
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
DOWNLOADS_DIR: c:\temp\downloads
|
||||||
|
DEPENDENCIES_DIR: c:\temp\dependencies
|
||||||
|
ENABLE_APL: 1
|
||||||
|
ENABLE_OPENBLAS: 0
|
||||||
|
MSVC_VERSION : 14.42
|
||||||
|
AWS_DEFAULT_REGION: us-east-1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
get-label-type:
|
||||||
|
if: github.repository_owner == 'pytorch'
|
||||||
|
name: get-label-type
|
||||||
|
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
||||||
|
with:
|
||||||
|
triggering_actor: ${{ github.triggering_actor }}
|
||||||
|
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||||
|
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||||
|
curr_ref_type: ${{ github.ref_type }}
|
||||||
|
|
||||||
|
{%- for config in build_configs %}
|
||||||
|
!{{ config["build_name"] }}-build:
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "windows-11-arm64"
|
||||||
|
timeout-minutes: !{{ common.timeout_minutes }}
|
||||||
|
!{{ upload.binary_env(config, True) }}
|
||||||
|
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
|
||||||
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
|
||||||
|
{%- endif %}
|
||||||
|
steps:
|
||||||
|
!{{ set_runner_specific_vars() }}
|
||||||
|
- name: Bootstrap folders
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
mkdir "%NIGHTLIES_PYTORCH_ROOT%"
|
||||||
|
mkdir "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||||
|
- name: Git checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: "pytorch"
|
||||||
|
- name: Bootstrap Build Tools
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
||||||
|
- name: Bootstrap Git
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||||
|
- name: Remove Pytorch folder
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
rmdir /s /q "pytorch"
|
||||||
|
- name: Git checkout PyTorch - recursive
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: "pytorch"
|
||||||
|
submodules: recursive
|
||||||
|
- name: Bootstrap Python
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
||||||
|
- name: Bootstrap APL
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
||||||
|
- name: Bootstrap Rust
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||||
|
- name: Bootstrap sccache
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_sccache.bat"
|
||||||
|
- name: Bootstrap Libuv
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_libuv.bat"
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||||
|
- name: Build PyTorch binary
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
"pytorch/.circleci/scripts/binary_windows_arm64_build.sh"
|
||||||
|
- uses: !{{ common.upload_artifact_action }}
|
||||||
|
if: always()
|
||||||
|
with:
|
||||||
|
name: !{{ config["build_name"] }}
|
||||||
|
retention-days: 14
|
||||||
|
if-no-files-found: error
|
||||||
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
!{{ config["build_name"] }}-test: # Testing
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
needs:
|
||||||
|
- !{{ config["build_name"] }}-build
|
||||||
|
- get-label-type
|
||||||
|
runs-on: "windows-11-arm64"
|
||||||
|
timeout-minutes: !{{ common.timeout_minutes }}
|
||||||
|
!{{ upload.binary_env(config, True) }}
|
||||||
|
steps:
|
||||||
|
!{{ set_runner_specific_vars() }}
|
||||||
|
- uses: !{{ common.download_artifact_action }}
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: !{{ config["build_name"] }}
|
||||||
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Git checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: "pytorch"
|
||||||
|
- name: Bootstrap Git
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||||
|
- name: Remove Pytorch folder
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
rmdir /s /q "pytorch"
|
||||||
|
- name: Git checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: "pytorch"
|
||||||
|
submodules: recursive
|
||||||
|
- name: Bootstrap APL
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
||||||
|
- name: Bootstrap Python
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
||||||
|
- name: Bootstrap Build Tools
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
||||||
|
- name: Bootstrap Rust
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||||
|
- name: Test PyTorch binary
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
"pytorch/.circleci/scripts/binary_windows_arm64_test.sh"
|
||||||
|
{%- if branches == "nightly" %}
|
||||||
|
!{{ upload.upload_binaries(config, True) }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
@ -49,15 +49,6 @@ env:
|
|||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
OS: !{{ os }}
|
|
||||||
{%- if os == "windows-arm64" %}
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
DOWNLOADS_DIR: c:\temp\downloads
|
|
||||||
DEPENDENCIES_DIR: c:\temp\dependencies
|
|
||||||
ENABLE_APL: 1
|
|
||||||
ENABLE_OPENBLAS: 0
|
|
||||||
MSVC_VERSION : 14.42
|
|
||||||
{%- endif %}
|
|
||||||
!{{ common.concurrency(build_environment) }}
|
!{{ common.concurrency(build_environment) }}
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
@ -75,79 +66,20 @@ jobs:
|
|||||||
!{{ config["build_name"] }}-build:
|
!{{ config["build_name"] }}-build:
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
{%- if os == "windows-arm64" %}
|
|
||||||
runs-on: "windows-11-arm64"
|
|
||||||
{%- else %}
|
|
||||||
{%- if branches == "nightly" %}
|
{%- if branches == "nightly" %}
|
||||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
|
||||||
{%- else %}
|
{%- else %}
|
||||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
{%- endif %}
|
|
||||||
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
||||||
!{{ upload.binary_env(config, True) }}
|
!{{ upload.binary_env(config, True) }}
|
||||||
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
|
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
steps:
|
steps:
|
||||||
{%- if os == "windows-arm64" %}
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
|
||||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
|
||||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
|
||||||
- name: Bootstrap folders
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
mkdir "%NIGHTLIES_PYTORCH_ROOT%"
|
|
||||||
mkdir "%PYTORCH_FINAL_PACKAGE_DIR%"
|
|
||||||
- name: Git checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
path: "pytorch"
|
|
||||||
- name: Bootstrap Build Tools
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
|
||||||
- name: Bootstrap Git
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
|
||||||
- name: Remove Pytorch folder
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
rmdir /s /q "pytorch"
|
|
||||||
- name: Git checkout PyTorch - recursive
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
path: "pytorch"
|
|
||||||
submodules: recursive
|
|
||||||
- name: Bootstrap Python
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
|
||||||
- name: Bootstrap APL
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
|
||||||
- name: Bootstrap Rust
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
|
||||||
- name: Bootstrap sccache
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_sccache.bat"
|
|
||||||
- name: Bootstrap Libuv
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_libuv.bat"
|
|
||||||
{%- else %}
|
|
||||||
!{{ set_runner_specific_vars() }}
|
|
||||||
!{{ common.setup_ec2_windows() }}
|
!{{ common.setup_ec2_windows() }}
|
||||||
|
!{{ set_runner_specific_vars() }}
|
||||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||||
{%- endif %}
|
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -163,17 +95,12 @@ jobs:
|
|||||||
retention-days: 14
|
retention-days: 14
|
||||||
if-no-files-found: error
|
if-no-files-found: error
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
{%- if os != "windows-arm64" %}
|
|
||||||
!{{ common.wait_and_kill_ssh_windows('pytorch') }}
|
!{{ common.wait_and_kill_ssh_windows('pytorch') }}
|
||||||
{% endif %}
|
|
||||||
!{{ config["build_name"] }}-test: # Testing
|
!{{ config["build_name"] }}-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
- !{{ config["build_name"] }}-build
|
- !{{ config["build_name"] }}-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
{%- if os == "windows-arm64" %}
|
|
||||||
runs-on: "windows-11-arm64"
|
|
||||||
{%- else %}
|
|
||||||
{%- if config["gpu_arch_type"] == "cuda" %}
|
{%- if config["gpu_arch_type"] == "cuda" %}
|
||||||
{%- if branches == "nightly" %}
|
{%- if branches == "nightly" %}
|
||||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
|
||||||
@ -186,61 +113,18 @@ jobs:
|
|||||||
{%- else %}
|
{%- else %}
|
||||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
{%- endif %}
|
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
||||||
!{{ upload.binary_env(config, True) }}
|
!{{ upload.binary_env(config, True) }}
|
||||||
steps:
|
steps:
|
||||||
{%- if os == "windows-arm64" %}
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
|
||||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
|
||||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
|
||||||
- name: Git checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
path: "pytorch"
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
|
||||||
- name: Remove Pytorch folder
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
rmdir /s /q "pytorch"
|
|
||||||
- name: Git checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
path: "pytorch"
|
|
||||||
submodules: recursive
|
|
||||||
- name: Bootstrap APL
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
|
||||||
- name: Bootstrap Python
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
|
||||||
- name: Bootstrap Build Tools
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
|
||||||
- name: Bootstrap Rust
|
|
||||||
shell: cmd
|
|
||||||
run: |
|
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
|
||||||
{%- else %}
|
|
||||||
!{{ common.setup_ec2_windows() }}
|
!{{ common.setup_ec2_windows() }}
|
||||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
|
||||||
!{{ set_runner_specific_vars() }}
|
!{{ set_runner_specific_vars() }}
|
||||||
{%- endif %}
|
|
||||||
- uses: !{{ common.download_artifact_action }}
|
- uses: !{{ common.download_artifact_action }}
|
||||||
name: Download Build Artifacts
|
name: Download Build Artifacts
|
||||||
with:
|
with:
|
||||||
name: !{{ config["build_name"] }}
|
name: !{{ config["build_name"] }}
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -249,10 +133,8 @@ jobs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
|
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
|
||||||
{%- if os != "windows-arm64" %}
|
|
||||||
!{{ common.wait_and_kill_ssh_windows('pytorch') }}
|
!{{ common.wait_and_kill_ssh_windows('pytorch') }}
|
||||||
{%- endif %}
|
|
||||||
{%- if branches == "nightly" %}
|
{%- if branches == "nightly" %}
|
||||||
!{{ upload.upload_binaries(config, True) }}
|
!{{ upload.upload_binaries(config, True) }}
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
{%- endfor %}
|
{%- endfor %}
|
||||||
|
|||||||
18
.github/workflows/_bazel-build-test.yml
vendored
18
.github/workflows/_bazel-build-test.yml
vendored
@ -33,10 +33,6 @@ on:
|
|||||||
default: "linux.large"
|
default: "linux.large"
|
||||||
description: Runner type
|
description: Runner type
|
||||||
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||||
|
|
||||||
@ -84,13 +80,6 @@ jobs:
|
|||||||
- name: Setup Linux
|
- name: Setup Linux
|
||||||
uses: ./.github/actions/setup-linux
|
uses: ./.github/actions/setup-linux
|
||||||
|
|
||||||
- name: Configure AWS Credentials
|
|
||||||
uses: aws-actions/configure-aws-credentials@v4
|
|
||||||
with:
|
|
||||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
|
||||||
role-session-name: gha-bazel-build
|
|
||||||
aws-region: us-east-1
|
|
||||||
|
|
||||||
- name: Calculate docker image
|
- name: Calculate docker image
|
||||||
id: calculate-docker-image
|
id: calculate-docker-image
|
||||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
@ -213,13 +202,6 @@ jobs:
|
|||||||
uses: ./.github/actions/chown-workspace
|
uses: ./.github/actions/chown-workspace
|
||||||
if: always()
|
if: always()
|
||||||
|
|
||||||
- name: Configure AWS Credentials
|
|
||||||
uses: aws-actions/configure-aws-credentials@v4
|
|
||||||
with:
|
|
||||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_pytorch_artifacts
|
|
||||||
role-session-name: gha-bazel-build-upload-artifacts
|
|
||||||
aws-region: us-east-1
|
|
||||||
|
|
||||||
- name: Upload test artifacts
|
- name: Upload test artifacts
|
||||||
uses: ./.github/actions/upload-test-artifacts
|
uses: ./.github/actions/upload-test-artifacts
|
||||||
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
|
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
|
||||||
|
|||||||
6
.github/workflows/_mac-test.yml
vendored
6
.github/workflows/_mac-test.yml
vendored
@ -38,11 +38,6 @@ on:
|
|||||||
required: false
|
required: false
|
||||||
type: boolean
|
type: boolean
|
||||||
default: true
|
default: true
|
||||||
secrets:
|
|
||||||
HUGGING_FACE_HUB_TOKEN:
|
|
||||||
required: false
|
|
||||||
description: |
|
|
||||||
HF Auth token to avoid rate limits when downloading models or datasets from hub
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
@ -171,7 +166,6 @@ jobs:
|
|||||||
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
||||||
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
|
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
|
||||||
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
|
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
|
||||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
|
||||||
run: |
|
run: |
|
||||||
# shellcheck disable=SC1090
|
# shellcheck disable=SC1090
|
||||||
set -ex
|
set -ex
|
||||||
|
|||||||
4
.github/workflows/_xpu-test.yml
vendored
4
.github/workflows/_xpu-test.yml
vendored
@ -47,10 +47,6 @@ on:
|
|||||||
type: boolean
|
type: boolean
|
||||||
default: true
|
default: true
|
||||||
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||||
|
|
||||||
|
|||||||
43
.github/workflows/build-almalinux-images.yml
vendored
43
.github/workflows/build-almalinux-images.yml
vendored
@ -11,14 +11,14 @@ on:
|
|||||||
# Release candidate tags look like: v1.11.0-rc1
|
# Release candidate tags look like: v1.11.0-rc1
|
||||||
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||||
paths:
|
paths:
|
||||||
- .ci/docker/**
|
- '.ci/docker/almalinux/*'
|
||||||
|
- '.ci/docker/common/*'
|
||||||
- .github/workflows/build-almalinux-images.yml
|
- .github/workflows/build-almalinux-images.yml
|
||||||
- .github/actions/binary-docker-build/**
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- .ci/docker/**
|
- '.ci/docker/almalinux/*'
|
||||||
|
- '.ci/docker/common/*'
|
||||||
- .github/workflows/build-almalinux-images.yml
|
- .github/workflows/build-almalinux-images.yml
|
||||||
- .github/actions/binary-docker-build/**
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
DOCKER_REGISTRY: "docker.io"
|
DOCKER_REGISTRY: "docker.io"
|
||||||
@ -37,12 +37,37 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
cuda_version: ["11.8", "12.4", "12.6", "cpu"]
|
cuda_version: ["11.8", "12.4", "12.6", "cpu"]
|
||||||
|
env:
|
||||||
|
CUDA_VERSION: ${{ matrix.cuda_version }}
|
||||||
steps:
|
steps:
|
||||||
- name: Build docker image
|
- name: Checkout PyTorch
|
||||||
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
with:
|
with:
|
||||||
docker-image-name: almalinux-builder
|
submodules: false
|
||||||
custom-tag-prefix: ${{ matrix.cuda_version != 'cpu' && 'cuda' || '' }}${{matrix.cuda_version}}
|
- name: Calculate docker image
|
||||||
docker-build-dir: almalinux
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: almalinux-builder${{ matrix.cuda_version == 'cpu' && '-' || '-cuda' }}${{matrix.cuda_version}}
|
||||||
|
docker-build-dir: .ci/docker/almalinux
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/almalinux/build.sh almalinux-builder${{ matrix.cuda_version == 'cpu' && ':' || ':cuda' }}${{matrix.cuda_version}}
|
||||||
|
|||||||
138
.github/workflows/build-libtorch-images.yml
vendored
138
.github/workflows/build-libtorch-images.yml
vendored
@ -10,14 +10,14 @@ on:
|
|||||||
# Release candidate tags look like: v1.11.0-rc1
|
# Release candidate tags look like: v1.11.0-rc1
|
||||||
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||||
paths:
|
paths:
|
||||||
- .ci/docker/**
|
- '.ci/docker/libtorch/*'
|
||||||
|
- '.ci/docker/common/*'
|
||||||
- .github/workflows/build-libtorch-images.yml
|
- .github/workflows/build-libtorch-images.yml
|
||||||
- .github/actions/binary-docker-build/**
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- .ci/docker/**
|
- '.ci/docker/libtorch/*'
|
||||||
|
- '.ci/docker/common/*'
|
||||||
- .github/workflows/build-libtorch-images.yml
|
- .github/workflows/build-libtorch-images.yml
|
||||||
- .github/actions/binary-docker-build/**
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
DOCKER_REGISTRY: "docker.io"
|
DOCKER_REGISTRY: "docker.io"
|
||||||
@ -39,29 +39,123 @@ jobs:
|
|||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
curr_ref_type: ${{ github.ref_type }}
|
||||||
|
|
||||||
build:
|
build-docker-cuda:
|
||||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
runs-on: ${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||||
name: libtorch-cxx11-builder:${{ matrix.tag }}
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
matrix:
|
||||||
include: [
|
cuda_version: ["12.8", "12.6", "12.4", "11.8"]
|
||||||
{ tag: "cuda12.8" },
|
env:
|
||||||
{ tag: "cuda12.6" },
|
GPU_ARCH_TYPE: cuda
|
||||||
{ tag: "cuda12.4" },
|
GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
|
||||||
{ tag: "cuda11.8" },
|
|
||||||
{ tag: "rocm6.3" },
|
|
||||||
{ tag: "rocm6.4" },
|
|
||||||
{ tag: "cpu" },
|
|
||||||
]
|
|
||||||
steps:
|
steps:
|
||||||
- name: Build docker image
|
- name: Checkout PyTorch
|
||||||
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
with:
|
with:
|
||||||
docker-image-name: libtorch-cxx11-builder
|
submodules: false
|
||||||
custom-tag-prefix: ${{ matrix.tag }}
|
- name: Calculate docker image
|
||||||
docker-build-dir: libtorch
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: libtorch-cxx11-builder-cuda${{matrix.cuda_version}}
|
||||||
|
docker-build-dir: .ci/docker/libtorch
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/libtorch/build.sh libtorch-cxx11-builder:cuda${{matrix.cuda_version}}
|
||||||
|
build-docker-rocm:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
rocm_version: ["6.2.4", "6.3"]
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: libtorch-cxx11-builder-rocm${{matrix.rocm_version}}
|
||||||
|
docker-build-dir: .ci/docker/libtorch
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/libtorch/build.sh libtorch-cxx11-builder:rocm${{matrix.rocm_version}}
|
||||||
|
build-docker-cpu:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: libtorch-cxx11-builder-cpu
|
||||||
|
docker-build-dir: .ci/docker/libtorch
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/libtorch/build.sh libtorch-cxx11-builder:cpu
|
||||||
|
|||||||
2
.github/workflows/build-magma-rocm-linux.yml
vendored
2
.github/workflows/build-magma-rocm-linux.yml
vendored
@ -34,7 +34,7 @@ jobs:
|
|||||||
id-token: write
|
id-token: write
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
rocm_version: ["64", "63"]
|
rocm_version: ["63", "624"]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|||||||
@ -11,11 +11,15 @@ on:
|
|||||||
# Release candidate tags look like: v1.11.0-rc1
|
# Release candidate tags look like: v1.11.0-rc1
|
||||||
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||||
paths:
|
paths:
|
||||||
- .ci/docker/**
|
- '.ci/docker/manywheel/*'
|
||||||
|
- '.ci/docker/manywheel/build_scripts/*'
|
||||||
|
- '.ci/docker/common/*'
|
||||||
- .github/workflows/build-manywheel-images-s390x.yml
|
- .github/workflows/build-manywheel-images-s390x.yml
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- .ci/docker/**
|
- '.ci/docker/manywheel/*'
|
||||||
|
- '.ci/docker/manywheel/build_scripts/*'
|
||||||
|
- '.ci/docker/common/*'
|
||||||
- .github/workflows/build-manywheel-images-s390x.yml
|
- .github/workflows/build-manywheel-images-s390x.yml
|
||||||
|
|
||||||
|
|
||||||
@ -33,45 +37,26 @@ jobs:
|
|||||||
if: github.repository_owner == 'pytorch'
|
if: github.repository_owner == 'pytorch'
|
||||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
runs-on: linux.s390x
|
runs-on: linux.s390x
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
with:
|
with:
|
||||||
submodules: false
|
submodules: false
|
||||||
no-sudo: true
|
no-sudo: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
- name: Build Docker Image
|
if: env.WITH_PUSH == 'true'
|
||||||
run: |
|
|
||||||
.ci/docker/manywheel/build.sh manylinuxs390x-builder:cpu-s390x -t manylinuxs390x-builder:cpu-s390x
|
|
||||||
|
|
||||||
- name: Tag and (if WITH_PUSH) push docker image to docker.io
|
|
||||||
env:
|
env:
|
||||||
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
CREATED_FULL_DOCKER_IMAGE_NAME: manylinuxs390x-builder:cpu-s390x
|
|
||||||
shell: bash
|
|
||||||
run: |
|
run: |
|
||||||
set -euox pipefail
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
GITHUB_REF="${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}"
|
|
||||||
GIT_BRANCH_NAME="${GITHUB_REF##*/}"
|
|
||||||
GIT_COMMIT_SHA="${GITHUB_SHA:-$(git rev-parse HEAD)}"
|
|
||||||
CI_FOLDER_SHA="$(git rev-parse HEAD:.ci/docker)"
|
|
||||||
|
|
||||||
DOCKER_IMAGE_NAME_PREFIX="docker.io/pytorch/${CREATED_FULL_DOCKER_IMAGE_NAME}"
|
|
||||||
|
|
||||||
docker tag "${CREATED_FULL_DOCKER_IMAGE_NAME}" "${DOCKER_IMAGE_NAME_PREFIX}-${GIT_BRANCH_NAME}"
|
|
||||||
docker tag "${CREATED_FULL_DOCKER_IMAGE_NAME}" "${DOCKER_IMAGE_NAME_PREFIX}-${GIT_COMMIT_SHA}"
|
|
||||||
docker tag "${CREATED_FULL_DOCKER_IMAGE_NAME}" "${DOCKER_IMAGE_NAME_PREFIX}-${CI_FOLDER_SHA}"
|
|
||||||
|
|
||||||
# Prety sure Github will mask tokens and I'm not sure if it will even be
|
|
||||||
# printed due to pipe, but just in case
|
|
||||||
set +x
|
|
||||||
if [[ "${WITH_PUSH:-false}" == "true" ]]; then
|
|
||||||
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
docker push "${DOCKER_IMAGE_NAME_PREFIX}-${GIT_BRANCH_NAME}"
|
|
||||||
docker push "${DOCKER_IMAGE_NAME_PREFIX}-${GIT_COMMIT_SHA}"
|
|
||||||
docker push "${DOCKER_IMAGE_NAME_PREFIX}-${CI_FOLDER_SHA}"
|
|
||||||
fi
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
run: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinuxs390x-builder:cpu-s390x
|
||||||
|
|
||||||
- name: Cleanup docker
|
- name: Cleanup docker
|
||||||
if: cancelled()
|
if: cancelled()
|
||||||
|
|||||||
345
.github/workflows/build-manywheel-images.yml
vendored
345
.github/workflows/build-manywheel-images.yml
vendored
@ -11,14 +11,17 @@ on:
|
|||||||
# Release candidate tags look like: v1.11.0-rc1
|
# Release candidate tags look like: v1.11.0-rc1
|
||||||
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||||
paths:
|
paths:
|
||||||
- .ci/docker/**
|
- '.ci/docker/common/*'
|
||||||
|
- '.ci/docker/manywheel/*'
|
||||||
|
- '.ci/docker/manywheel/build_scripts/*'
|
||||||
- .github/workflows/build-manywheel-images.yml
|
- .github/workflows/build-manywheel-images.yml
|
||||||
- .github/actions/binary-docker-build/**
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- .ci/docker/**
|
- '.ci/docker/common/*'
|
||||||
|
- '.ci/docker/manywheel/*'
|
||||||
|
- '.ci/docker/manywheel/build_scripts/*'
|
||||||
- .github/workflows/build-manywheel-images.yml
|
- .github/workflows/build-manywheel-images.yml
|
||||||
- .github/actions/binary-docker-build/**
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
DOCKER_REGISTRY: "docker.io"
|
DOCKER_REGISTRY: "docker.io"
|
||||||
@ -40,34 +43,322 @@ jobs:
|
|||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
curr_ref_type: ${{ github.ref_type }}
|
||||||
|
|
||||||
build:
|
build-docker-cuda-manylinux_2_28:
|
||||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
matrix:
|
||||||
include: [
|
cuda_version: ["12.8", "12.6", "12.4", "11.8"]
|
||||||
{ name: "manylinux2_28-builder", tag: "cuda12.8", runner: "linux.9xlarge.ephemeral" },
|
env:
|
||||||
{ name: "manylinux2_28-builder", tag: "cuda12.6", runner: "linux.9xlarge.ephemeral" },
|
GPU_ARCH_TYPE: cuda-manylinux_2_28
|
||||||
{ name: "manylinux2_28-builder", tag: "cuda12.4", runner: "linux.9xlarge.ephemeral" },
|
GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
|
||||||
{ name: "manylinux2_28-builder", tag: "cuda11.8", runner: "linux.9xlarge.ephemeral" },
|
|
||||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
|
|
||||||
{ name: "manylinux2_28-builder", tag: "rocm6.3", runner: "linux.9xlarge.ephemeral" },
|
|
||||||
{ name: "manylinux2_28-builder", tag: "rocm6.4", runner: "linux.9xlarge.ephemeral" },
|
|
||||||
{ name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" },
|
|
||||||
{ name: "manylinuxaarch64-builder", tag: "cpu-aarch64", runner: "linux.arm64.2xlarge.ephemeral" },
|
|
||||||
{ name: "manylinux2_28_aarch64-builder", tag: "cpu-aarch64", runner: "linux.arm64.2xlarge.ephemeral" },
|
|
||||||
{ name: "manylinuxcxx11-abi-builder", tag: "cpu-cxx11-abi", runner: "linux.9xlarge.ephemeral" },
|
|
||||||
{ name: "manylinux2_28-builder", tag: "xpu", runner: "linux.9xlarge.ephemeral" },
|
|
||||||
]
|
|
||||||
runs-on: ${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}
|
|
||||||
name: ${{ matrix.name }}:${{ matrix.tag }}
|
|
||||||
steps:
|
steps:
|
||||||
- name: Build docker image
|
- name: Purge tools folder (free space for build)
|
||||||
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
run: rm -rf /opt/hostedtoolcache
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
with:
|
with:
|
||||||
docker-image-name: ${{ matrix.name }}
|
submodules: false
|
||||||
custom-tag-prefix: ${{ matrix.tag }}
|
- name: Calculate docker image
|
||||||
docker-build-dir: manywheel
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: manylinux2_28-builder-cuda${{matrix.cuda_version}}
|
||||||
|
docker-build-dir: .ci/docker/manywheel
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinux2_28-builder:cuda${{matrix.cuda_version}}
|
||||||
|
build-docker-cuda-aarch64:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge.ephemeral"
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
cuda_version: ["12.8"]
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: cuda-aarch64
|
||||||
|
GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: manylinuxaarch64-builder-cuda${{matrix.cuda_version}}
|
||||||
|
docker-build-dir: .ci/docker/manywheel
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinuxaarch64-builder:cuda${{matrix.cuda_version}}
|
||||||
|
build-docker-rocm-manylinux_2_28:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
rocm_version: ["6.2.4", "6.3"]
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: rocm-manylinux_2_28
|
||||||
|
GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: manylinux2_28-builder-rocm${{matrix.rocm_version}}
|
||||||
|
docker-build-dir: .ci/docker/manywheel
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinux2_28-builder:rocm${{matrix.rocm_version}}
|
||||||
|
build-docker-cpu-manylinux_2_28:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: cpu-manylinux_2_28
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: manylinux2_28-builder-cpu
|
||||||
|
docker-build-dir: .ci/docker/manywheel
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinux2_28-builder:cpu
|
||||||
|
build-docker-cpu-aarch64:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge.ephemeral"
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: cpu-aarch64
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: manylinuxaarch64-builder-cpu-aarch64
|
||||||
|
docker-build-dir: .ci/docker/manywheel
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinuxaarch64-builder:cpu-aarch64
|
||||||
|
build-docker-cpu-aarch64-2_28:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge.ephemeral"
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: cpu-aarch64-2_28
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: manylinux2_28_aarch64-builder-cpu-aarch64
|
||||||
|
docker-build-dir: .ci/docker/manywheel
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinux2_28_aarch64-builder:cpu-aarch64
|
||||||
|
build-docker-cpu-cxx11-abi:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: cpu-cxx11-abi
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: manylinuxcxx11-abi-builder-cpu-cxx11-abi
|
||||||
|
docker-build-dir: .ci/docker/manywheel
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinuxcxx11-abi-builder:cpu-cxx11-abi
|
||||||
|
build-docker-xpu:
|
||||||
|
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||||
|
needs: get-label-type
|
||||||
|
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
|
||||||
|
env:
|
||||||
|
GPU_ARCH_TYPE: xpu
|
||||||
|
steps:
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Calculate docker image
|
||||||
|
if: env.WITH_PUSH == 'false'
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image-name: manylinux2_28-builder-xpu
|
||||||
|
docker-build-dir: .ci/docker/manywheel
|
||||||
|
always-rebuild: true
|
||||||
|
push: true
|
||||||
|
- name: Authenticate if WITH_PUSH
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
env:
|
||||||
|
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
|
||||||
|
DOCKER_ID: ${{ secrets.DOCKER_ID }}
|
||||||
|
run: |
|
||||||
|
if [[ "${WITH_PUSH}" == true ]]; then
|
||||||
|
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
|
||||||
|
fi
|
||||||
|
- name: Build Docker Image
|
||||||
|
if: env.WITH_PUSH == 'true'
|
||||||
|
uses: nick-fields/retry@v3.0.0
|
||||||
|
with:
|
||||||
|
shell: bash
|
||||||
|
timeout_minutes: 90
|
||||||
|
max_attempts: 3
|
||||||
|
retry_wait_seconds: 90
|
||||||
|
command: |
|
||||||
|
.ci/docker/manywheel/build.sh manylinux2_28-builder:xpu
|
||||||
|
|||||||
2
.github/workflows/build-triton-wheel.yml
vendored
2
.github/workflows/build-triton-wheel.yml
vendored
@ -54,7 +54,7 @@ jobs:
|
|||||||
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
|
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
|
||||||
include:
|
include:
|
||||||
- device: "rocm"
|
- device: "rocm"
|
||||||
rocm_version: "6.4"
|
rocm_version: "6.3"
|
||||||
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
|
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
|
||||||
- device: "cuda"
|
- device: "cuda"
|
||||||
rocm_version: ""
|
rocm_version: ""
|
||||||
|
|||||||
2
.github/workflows/docker-builds.yml
vendored
2
.github/workflows/docker-builds.yml
vendored
@ -79,7 +79,7 @@ jobs:
|
|||||||
]
|
]
|
||||||
include:
|
include:
|
||||||
- docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11
|
- docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11
|
||||||
runner: linux.arm64.m7g.4xlarge
|
runner: linux.arm64.2xlarge
|
||||||
- docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks
|
- docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks
|
||||||
runner: linux.arm64.m7g.4xlarge
|
runner: linux.arm64.m7g.4xlarge
|
||||||
timeout-minutes: 600
|
timeout-minutes: 600
|
||||||
|
|||||||
184
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
184
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
@ -301,6 +301,98 @@ jobs:
|
|||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
||||||
|
libtorch-rocm6_2_4-shared-with-deps-release-build:
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
|
needs: get-label-type
|
||||||
|
with:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.2.4
|
||||||
|
GPU_ARCH_VERSION: 6.2.4
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
|
build_name: libtorch-rocm6_2_4-shared-with-deps-release
|
||||||
|
build_environment: linux-binary-libtorch
|
||||||
|
secrets:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
libtorch-rocm6_2_4-shared-with-deps-release-test: # Testing
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
needs:
|
||||||
|
- libtorch-rocm6_2_4-shared-with-deps-release-build
|
||||||
|
- get-label-type
|
||||||
|
runs-on: linux.rocm.gpu
|
||||||
|
timeout-minutes: 240
|
||||||
|
env:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.2.4
|
||||||
|
GPU_ARCH_VERSION: 6.2.4
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
SKIP_ALL_TESTS: 1
|
||||||
|
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
steps:
|
||||||
|
- name: Setup ROCm
|
||||||
|
uses: ./.github/actions/setup-rocm
|
||||||
|
- uses: actions/download-artifact@v4.1.7
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: libtorch-rocm6_2_4-shared-with-deps-release
|
||||||
|
path: "${{ runner.temp }}/artifacts/"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: ROCm set GPU_FLAG
|
||||||
|
run: |
|
||||||
|
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||||
|
- name: Pull Docker image
|
||||||
|
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
|
||||||
|
- name: Test Pytorch binary
|
||||||
|
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||||
|
- name: Teardown ROCm
|
||||||
|
uses: ./.github/actions/teardown-rocm
|
||||||
|
libtorch-rocm6_2_4-shared-with-deps-release-upload: # Uploading
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
|
contents: read
|
||||||
|
needs: libtorch-rocm6_2_4-shared-with-deps-release-test
|
||||||
|
with:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.2.4
|
||||||
|
GPU_ARCH_VERSION: 6.2.4
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
build_name: libtorch-rocm6_2_4-shared-with-deps-release
|
||||||
|
secrets:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
||||||
libtorch-rocm6_3-shared-with-deps-release-build:
|
libtorch-rocm6_3-shared-with-deps-release-build:
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
@ -392,95 +484,3 @@ jobs:
|
|||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
||||||
libtorch-rocm6_4-shared-with-deps-release-build:
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm6.4
|
|
||||||
GPU_ARCH_VERSION: 6.4
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.4-main
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
build_name: libtorch-rocm6_4-shared-with-deps-release
|
|
||||||
build_environment: linux-binary-libtorch
|
|
||||||
secrets:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
libtorch-rocm6_4-shared-with-deps-release-test: # Testing
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
needs:
|
|
||||||
- libtorch-rocm6_4-shared-with-deps-release-build
|
|
||||||
- get-label-type
|
|
||||||
runs-on: linux.rocm.gpu
|
|
||||||
timeout-minutes: 240
|
|
||||||
env:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm6.4
|
|
||||||
GPU_ARCH_VERSION: 6.4
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
SKIP_ALL_TESTS: 1
|
|
||||||
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.4-main
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
steps:
|
|
||||||
- name: Setup ROCm
|
|
||||||
uses: ./.github/actions/setup-rocm
|
|
||||||
- uses: actions/download-artifact@v4.1.7
|
|
||||||
name: Download Build Artifacts
|
|
||||||
with:
|
|
||||||
name: libtorch-rocm6_4-shared-with-deps-release
|
|
||||||
path: "${{ runner.temp }}/artifacts/"
|
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
- name: ROCm set GPU_FLAG
|
|
||||||
run: |
|
|
||||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
|
||||||
- name: Pull Docker image
|
|
||||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
|
||||||
with:
|
|
||||||
docker-image: pytorch/libtorch-cxx11-builder:rocm6.4-main
|
|
||||||
- name: Test Pytorch binary
|
|
||||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
|
||||||
- name: Teardown ROCm
|
|
||||||
uses: ./.github/actions/teardown-rocm
|
|
||||||
libtorch-rocm6_4-shared-with-deps-release-upload: # Uploading
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
needs: libtorch-rocm6_4-shared-with-deps-release-test
|
|
||||||
with:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm6.4
|
|
||||||
GPU_ARCH_VERSION: 6.4
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.4-main
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
build_name: libtorch-rocm6_4-shared-with-deps-release
|
|
||||||
secrets:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
|
||||||
|
|||||||
1104
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
1104
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
30
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
30
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
@ -55,7 +55,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
runs_on: linux.s390x
|
runs_on: linux.s390x
|
||||||
@ -79,7 +79,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
build_name: manywheel-py3_9-cpu-s390x
|
build_name: manywheel-py3_9-cpu-s390x
|
||||||
@ -101,7 +101,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
build_name: manywheel-py3_9-cpu-s390x
|
build_name: manywheel-py3_9-cpu-s390x
|
||||||
@ -120,7 +120,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.10"
|
||||||
runs_on: linux.s390x
|
runs_on: linux.s390x
|
||||||
@ -144,7 +144,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.10"
|
||||||
build_name: manywheel-py3_10-cpu-s390x
|
build_name: manywheel-py3_10-cpu-s390x
|
||||||
@ -166,7 +166,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.10"
|
||||||
build_name: manywheel-py3_10-cpu-s390x
|
build_name: manywheel-py3_10-cpu-s390x
|
||||||
@ -185,7 +185,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.11"
|
DESIRED_PYTHON: "3.11"
|
||||||
runs_on: linux.s390x
|
runs_on: linux.s390x
|
||||||
@ -209,7 +209,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.11"
|
DESIRED_PYTHON: "3.11"
|
||||||
build_name: manywheel-py3_11-cpu-s390x
|
build_name: manywheel-py3_11-cpu-s390x
|
||||||
@ -231,7 +231,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.11"
|
DESIRED_PYTHON: "3.11"
|
||||||
build_name: manywheel-py3_11-cpu-s390x
|
build_name: manywheel-py3_11-cpu-s390x
|
||||||
@ -250,7 +250,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
runs_on: linux.s390x
|
runs_on: linux.s390x
|
||||||
@ -274,7 +274,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
build_name: manywheel-py3_12-cpu-s390x
|
build_name: manywheel-py3_12-cpu-s390x
|
||||||
@ -296,7 +296,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
build_name: manywheel-py3_12-cpu-s390x
|
build_name: manywheel-py3_12-cpu-s390x
|
||||||
@ -315,7 +315,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.13"
|
DESIRED_PYTHON: "3.13"
|
||||||
runs_on: linux.s390x
|
runs_on: linux.s390x
|
||||||
@ -339,7 +339,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.13"
|
DESIRED_PYTHON: "3.13"
|
||||||
build_name: manywheel-py3_13-cpu-s390x
|
build_name: manywheel-py3_13-cpu-s390x
|
||||||
@ -361,7 +361,7 @@ jobs:
|
|||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cpu
|
DESIRED_CUDA: cpu
|
||||||
GPU_ARCH_TYPE: cpu-s390x
|
GPU_ARCH_TYPE: cpu-s390x
|
||||||
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x
|
DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
use_split_build: False
|
use_split_build: False
|
||||||
DESIRED_PYTHON: "3.13"
|
DESIRED_PYTHON: "3.13"
|
||||||
build_name: manywheel-py3_13-cpu-s390x
|
build_name: manywheel-py3_13-cpu-s390x
|
||||||
|
|||||||
41
.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml
generated
vendored
41
.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml
generated
vendored
@ -1,12 +1,11 @@
|
|||||||
# @generated DO NOT EDIT MANUALLY
|
# @generated DO NOT EDIT MANUALLY
|
||||||
|
|
||||||
# Template is at: .github/templates/windows_binary_build_workflow.yml.j2
|
# Template is at: .github/templates/windows_arm64_binary_build_workflow.yml.j2
|
||||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||||
name: windows-arm64-binary-libtorch-debug
|
name: windows-arm64-binary-libtorch-debug
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
# NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build
|
|
||||||
branches:
|
branches:
|
||||||
- nightly
|
- nightly
|
||||||
tags:
|
tags:
|
||||||
@ -18,24 +17,18 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
# Needed for conda builds
|
|
||||||
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
|
||||||
AWS_DEFAULT_REGION: us-east-1
|
|
||||||
BUILD_ENVIRONMENT: windows-arm64-binary-libtorch-debug
|
BUILD_ENVIRONMENT: windows-arm64-binary-libtorch-debug
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
OS: windows-arm64
|
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
DOWNLOADS_DIR: c:\temp\downloads
|
DOWNLOADS_DIR: c:\temp\downloads
|
||||||
DEPENDENCIES_DIR: c:\temp\dependencies
|
DEPENDENCIES_DIR: c:\temp\dependencies
|
||||||
ENABLE_APL: 1
|
ENABLE_APL: 1
|
||||||
ENABLE_OPENBLAS: 0
|
ENABLE_OPENBLAS: 0
|
||||||
MSVC_VERSION : 14.42
|
MSVC_VERSION : 14.42
|
||||||
concurrency:
|
AWS_DEFAULT_REGION: us-east-1
|
||||||
group: windows-arm64-binary-libtorch-debug-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
get-label-type:
|
get-label-type:
|
||||||
@ -51,7 +44,7 @@ jobs:
|
|||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
runs-on: "windows-11-arm64"
|
runs-on: "windows-11-arm64"
|
||||||
timeout-minutes: 300
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
||||||
PACKAGE_TYPE: libtorch
|
PACKAGE_TYPE: libtorch
|
||||||
@ -66,6 +59,9 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
@ -121,11 +117,11 @@ jobs:
|
|||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
|
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||||
- name: Build PyTorch binary
|
- name: Build PyTorch binary
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
|
"pytorch/.circleci/scripts/binary_windows_arm64_build.sh"
|
||||||
- uses: actions/upload-artifact@v4.4.0
|
- uses: actions/upload-artifact@v4.4.0
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@ -139,7 +135,7 @@ jobs:
|
|||||||
- libtorch-cpu-shared-with-deps-debug-build
|
- libtorch-cpu-shared-with-deps-debug-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: "windows-11-arm64"
|
runs-on: "windows-11-arm64"
|
||||||
timeout-minutes: 300
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
||||||
PACKAGE_TYPE: libtorch
|
PACKAGE_TYPE: libtorch
|
||||||
@ -154,17 +150,25 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
||||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
||||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
||||||
|
- uses: actions/download-artifact@v4.1.7
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: libtorch-cpu-shared-with-deps-debug
|
||||||
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
- name: Git checkout PyTorch
|
- name: Git checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
path: "pytorch"
|
path: "pytorch"
|
||||||
- name: Populate binary env
|
- name: Bootstrap Git
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||||
@ -193,19 +197,14 @@ jobs:
|
|||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||||
- uses: actions/download-artifact@v4.1.7
|
|
||||||
name: Download Build Artifacts
|
|
||||||
with:
|
|
||||||
name: libtorch-cpu-shared-with-deps-debug
|
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
|
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||||
- name: Test PyTorch binary
|
- name: Test PyTorch binary
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
|
"pytorch/.circleci/scripts/binary_windows_arm64_test.sh"
|
||||||
libtorch-cpu-shared-with-deps-debug-upload: # Uploading
|
libtorch-cpu-shared-with-deps-debug-upload: # Uploading
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
permissions:
|
permissions:
|
||||||
|
|||||||
41
.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml
generated
vendored
41
.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml
generated
vendored
@ -1,12 +1,11 @@
|
|||||||
# @generated DO NOT EDIT MANUALLY
|
# @generated DO NOT EDIT MANUALLY
|
||||||
|
|
||||||
# Template is at: .github/templates/windows_binary_build_workflow.yml.j2
|
# Template is at: .github/templates/windows_arm64_binary_build_workflow.yml.j2
|
||||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||||
name: windows-arm64-binary-libtorch-release
|
name: windows-arm64-binary-libtorch-release
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
# NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build
|
|
||||||
branches:
|
branches:
|
||||||
- nightly
|
- nightly
|
||||||
tags:
|
tags:
|
||||||
@ -18,24 +17,18 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
# Needed for conda builds
|
|
||||||
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
|
||||||
AWS_DEFAULT_REGION: us-east-1
|
|
||||||
BUILD_ENVIRONMENT: windows-arm64-binary-libtorch-release
|
BUILD_ENVIRONMENT: windows-arm64-binary-libtorch-release
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
OS: windows-arm64
|
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
DOWNLOADS_DIR: c:\temp\downloads
|
DOWNLOADS_DIR: c:\temp\downloads
|
||||||
DEPENDENCIES_DIR: c:\temp\dependencies
|
DEPENDENCIES_DIR: c:\temp\dependencies
|
||||||
ENABLE_APL: 1
|
ENABLE_APL: 1
|
||||||
ENABLE_OPENBLAS: 0
|
ENABLE_OPENBLAS: 0
|
||||||
MSVC_VERSION : 14.42
|
MSVC_VERSION : 14.42
|
||||||
concurrency:
|
AWS_DEFAULT_REGION: us-east-1
|
||||||
group: windows-arm64-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
get-label-type:
|
get-label-type:
|
||||||
@ -51,7 +44,7 @@ jobs:
|
|||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
runs-on: "windows-11-arm64"
|
runs-on: "windows-11-arm64"
|
||||||
timeout-minutes: 300
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
||||||
PACKAGE_TYPE: libtorch
|
PACKAGE_TYPE: libtorch
|
||||||
@ -66,6 +59,9 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
@ -121,11 +117,11 @@ jobs:
|
|||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
|
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||||
- name: Build PyTorch binary
|
- name: Build PyTorch binary
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
|
"pytorch/.circleci/scripts/binary_windows_arm64_build.sh"
|
||||||
- uses: actions/upload-artifact@v4.4.0
|
- uses: actions/upload-artifact@v4.4.0
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@ -139,7 +135,7 @@ jobs:
|
|||||||
- libtorch-cpu-shared-with-deps-release-build
|
- libtorch-cpu-shared-with-deps-release-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: "windows-11-arm64"
|
runs-on: "windows-11-arm64"
|
||||||
timeout-minutes: 300
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
||||||
PACKAGE_TYPE: libtorch
|
PACKAGE_TYPE: libtorch
|
||||||
@ -154,17 +150,25 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
||||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
||||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
||||||
|
- uses: actions/download-artifact@v4.1.7
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: libtorch-cpu-shared-with-deps-release
|
||||||
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
- name: Git checkout PyTorch
|
- name: Git checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
path: "pytorch"
|
path: "pytorch"
|
||||||
- name: Populate binary env
|
- name: Bootstrap Git
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||||
@ -193,19 +197,14 @@ jobs:
|
|||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||||
- uses: actions/download-artifact@v4.1.7
|
|
||||||
name: Download Build Artifacts
|
|
||||||
with:
|
|
||||||
name: libtorch-cpu-shared-with-deps-release
|
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
|
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||||
- name: Test PyTorch binary
|
- name: Test PyTorch binary
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
|
"pytorch/.circleci/scripts/binary_windows_arm64_test.sh"
|
||||||
libtorch-cpu-shared-with-deps-release-upload: # Uploading
|
libtorch-cpu-shared-with-deps-release-upload: # Uploading
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
permissions:
|
permissions:
|
||||||
|
|||||||
41
.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
generated
vendored
41
.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml
generated
vendored
@ -1,12 +1,11 @@
|
|||||||
# @generated DO NOT EDIT MANUALLY
|
# @generated DO NOT EDIT MANUALLY
|
||||||
|
|
||||||
# Template is at: .github/templates/windows_binary_build_workflow.yml.j2
|
# Template is at: .github/templates/windows_arm64_binary_build_workflow.yml.j2
|
||||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||||
name: windows-arm64-binary-wheel
|
name: windows-arm64-binary-wheel
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
# NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build
|
|
||||||
branches:
|
branches:
|
||||||
- nightly
|
- nightly
|
||||||
tags:
|
tags:
|
||||||
@ -18,24 +17,18 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
# Needed for conda builds
|
|
||||||
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
|
||||||
AWS_DEFAULT_REGION: us-east-1
|
|
||||||
BUILD_ENVIRONMENT: windows-arm64-binary-wheel
|
BUILD_ENVIRONMENT: windows-arm64-binary-wheel
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
OS: windows-arm64
|
|
||||||
PYTORCH_ROOT: /pytorch
|
PYTORCH_ROOT: /pytorch
|
||||||
DOWNLOADS_DIR: c:\temp\downloads
|
DOWNLOADS_DIR: c:\temp\downloads
|
||||||
DEPENDENCIES_DIR: c:\temp\dependencies
|
DEPENDENCIES_DIR: c:\temp\dependencies
|
||||||
ENABLE_APL: 1
|
ENABLE_APL: 1
|
||||||
ENABLE_OPENBLAS: 0
|
ENABLE_OPENBLAS: 0
|
||||||
MSVC_VERSION : 14.42
|
MSVC_VERSION : 14.42
|
||||||
concurrency:
|
AWS_DEFAULT_REGION: us-east-1
|
||||||
group: windows-arm64-binary-wheel-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
get-label-type:
|
get-label-type:
|
||||||
@ -51,7 +44,7 @@ jobs:
|
|||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
runs-on: "windows-11-arm64"
|
runs-on: "windows-11-arm64"
|
||||||
timeout-minutes: 300
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
||||||
PACKAGE_TYPE: wheel
|
PACKAGE_TYPE: wheel
|
||||||
@ -63,6 +56,9 @@ jobs:
|
|||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||||
steps:
|
steps:
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
@ -118,11 +114,11 @@ jobs:
|
|||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
|
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||||
- name: Build PyTorch binary
|
- name: Build PyTorch binary
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
|
"pytorch/.circleci/scripts/binary_windows_arm64_build.sh"
|
||||||
- uses: actions/upload-artifact@v4.4.0
|
- uses: actions/upload-artifact@v4.4.0
|
||||||
if: always()
|
if: always()
|
||||||
with:
|
with:
|
||||||
@ -136,7 +132,7 @@ jobs:
|
|||||||
- wheel-py3_12-cpu-build
|
- wheel-py3_12-cpu-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: "windows-11-arm64"
|
runs-on: "windows-11-arm64"
|
||||||
timeout-minutes: 300
|
timeout-minutes: 240
|
||||||
env:
|
env:
|
||||||
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
PYTORCH_ROOT: ${{ github.workspace }}/pytorch
|
||||||
PACKAGE_TYPE: wheel
|
PACKAGE_TYPE: wheel
|
||||||
@ -147,17 +143,25 @@ jobs:
|
|||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
steps:
|
steps:
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
||||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
||||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
||||||
|
- uses: actions/download-artifact@v4.1.7
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: wheel-py3_12-cpu
|
||||||
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
- name: Git checkout PyTorch
|
- name: Git checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
path: "pytorch"
|
path: "pytorch"
|
||||||
- name: Populate binary env
|
- name: Bootstrap Git
|
||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||||
@ -186,19 +190,14 @@ jobs:
|
|||||||
shell: cmd
|
shell: cmd
|
||||||
run: |
|
run: |
|
||||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||||
- uses: actions/download-artifact@v4.1.7
|
|
||||||
name: Download Build Artifacts
|
|
||||||
with:
|
|
||||||
name: wheel-py3_12-cpu
|
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
|
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||||
- name: Test PyTorch binary
|
- name: Test PyTorch binary
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
"${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
|
"pytorch/.circleci/scripts/binary_windows_arm64_test.sh"
|
||||||
wheel-py3_12-cpu-upload: # Uploading
|
wheel-py3_12-cpu-upload: # Uploading
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
permissions:
|
permissions:
|
||||||
|
|||||||
44
.github/workflows/generated-windows-binary-libtorch-debug-main.yml
generated
vendored
44
.github/workflows/generated-windows-binary-libtorch-debug-main.yml
generated
vendored
@ -19,7 +19,6 @@ env:
|
|||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
OS: windows
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: windows-binary-libtorch-debug-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
group: windows-binary-libtorch-debug-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
@ -53,15 +52,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -106,6 +96,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -146,7 +145,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cpu-shared-with-deps-debug-test: # Testing
|
libtorch-cpu-shared-with-deps-debug-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -212,18 +210,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -238,6 +224,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cpu-shared-with-deps-debug
|
name: libtorch-cpu-shared-with-deps-debug
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
173
.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
generated
vendored
173
.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
generated
vendored
@ -26,7 +26,6 @@ env:
|
|||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
OS: windows
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: windows-binary-libtorch-debug-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
group: windows-binary-libtorch-debug-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
@ -60,15 +59,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -113,6 +103,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -153,7 +152,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cpu-shared-with-deps-debug-test: # Testing
|
libtorch-cpu-shared-with-deps-debug-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -219,18 +217,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -245,6 +231,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cpu-shared-with-deps-debug
|
name: libtorch-cpu-shared-with-deps-debug
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -308,15 +306,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -361,6 +350,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -401,7 +399,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cuda11_8-shared-with-deps-debug-test: # Testing
|
libtorch-cuda11_8-shared-with-deps-debug-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -468,18 +465,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -494,6 +479,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cuda11_8-shared-with-deps-debug
|
name: libtorch-cuda11_8-shared-with-deps-debug
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -558,15 +555,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -611,6 +599,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -651,7 +648,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cuda12_6-shared-with-deps-debug-test: # Testing
|
libtorch-cuda12_6-shared-with-deps-debug-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -718,18 +714,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -744,6 +728,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cuda12_6-shared-with-deps-debug
|
name: libtorch-cuda12_6-shared-with-deps-debug
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -808,15 +804,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -861,6 +848,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -901,7 +897,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cuda12_8-shared-with-deps-debug-test: # Testing
|
libtorch-cuda12_8-shared-with-deps-debug-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -968,18 +963,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -994,6 +977,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cuda12_8-shared-with-deps-debug
|
name: libtorch-cuda12_8-shared-with-deps-debug
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
44
.github/workflows/generated-windows-binary-libtorch-release-main.yml
generated
vendored
44
.github/workflows/generated-windows-binary-libtorch-release-main.yml
generated
vendored
@ -19,7 +19,6 @@ env:
|
|||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
OS: windows
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: windows-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
group: windows-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
@ -53,15 +52,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -106,6 +96,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -146,7 +145,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cpu-shared-with-deps-release-test: # Testing
|
libtorch-cpu-shared-with-deps-release-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -212,18 +210,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -238,6 +224,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cpu-shared-with-deps-release
|
name: libtorch-cpu-shared-with-deps-release
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
173
.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
generated
vendored
173
.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
generated
vendored
@ -26,7 +26,6 @@ env:
|
|||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
OS: windows
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: windows-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
group: windows-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
@ -60,15 +59,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -113,6 +103,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -153,7 +152,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cpu-shared-with-deps-release-test: # Testing
|
libtorch-cpu-shared-with-deps-release-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -219,18 +217,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -245,6 +231,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cpu-shared-with-deps-release
|
name: libtorch-cpu-shared-with-deps-release
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -308,15 +306,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -361,6 +350,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -401,7 +399,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cuda11_8-shared-with-deps-release-test: # Testing
|
libtorch-cuda11_8-shared-with-deps-release-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -468,18 +465,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -494,6 +479,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cuda11_8-shared-with-deps-release
|
name: libtorch-cuda11_8-shared-with-deps-release
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -558,15 +555,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -611,6 +599,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -651,7 +648,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cuda12_6-shared-with-deps-release-test: # Testing
|
libtorch-cuda12_6-shared-with-deps-release-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -718,18 +714,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -744,6 +728,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cuda12_6-shared-with-deps-release
|
name: libtorch-cuda12_6-shared-with-deps-release
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -808,15 +804,6 @@ jobs:
|
|||||||
# without this value pip does not get installed for some reason
|
# without this value pip does not get installed for some reason
|
||||||
DESIRED_PYTHON: "3.9"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
|
||||||
# runner.temp variable, which we need.
|
|
||||||
- name: Populate binary env
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
|
||||||
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
|
||||||
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
|
||||||
- name: Display EC2 information
|
- name: Display EC2 information
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@ -861,6 +848,15 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
|
# runner.temp variable, which we need.
|
||||||
|
- name: Populate binary env
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
|
||||||
|
echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
|
||||||
|
echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -901,7 +897,6 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
.github\scripts\kill_active_ssh_sessions.ps1
|
.github\scripts\kill_active_ssh_sessions.ps1
|
||||||
|
|
||||||
libtorch-cuda12_8-shared-with-deps-release-test: # Testing
|
libtorch-cuda12_8-shared-with-deps-release-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
@ -968,18 +963,6 @@ jobs:
|
|||||||
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
# Let's both exclude the path and disable Windows Defender completely just to be sure
|
||||||
# that it doesn't interfere
|
# that it doesn't interfere
|
||||||
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||||
# runner.temp variable, which we need.
|
# runner.temp variable, which we need.
|
||||||
@ -994,6 +977,18 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: libtorch-cuda12_8-shared-with-deps-release
|
name: libtorch-cuda12_8-shared-with-deps-release
|
||||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
- name: Populate binary env
|
- name: Populate binary env
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
1291
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
1291
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,5 @@
|
|||||||
name: inductor-perf-nightly-macos
|
name: perf-nightly-macos
|
||||||
|
# Technically not an inductor test, but uses it as a template for tracking macos performance
|
||||||
|
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
@ -23,7 +24,6 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- .github/workflows/inductor-perf-test-nightly-macos.yml
|
- .github/workflows/inductor-perf-test-nightly-macos.yml
|
||||||
- .ci/pytorch/macos-test.sh
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||||
@ -38,7 +38,7 @@ jobs:
|
|||||||
uses: ./.github/workflows/_mac-build.yml
|
uses: ./.github/workflows/_mac-build.yml
|
||||||
with:
|
with:
|
||||||
sync-tag: macos-perf-py3-arm64-build
|
sync-tag: macos-perf-py3-arm64-build
|
||||||
build-environment: macos-py3-arm64-distributed
|
build-environment: macos-py3-arm64
|
||||||
runner-type: macos-m1-stable
|
runner-type: macos-m1-stable
|
||||||
build-generates-artifacts: true
|
build-generates-artifacts: true
|
||||||
# To match the one pre-installed in the m1 runners
|
# To match the one pre-installed in the m1 runners
|
||||||
@ -54,7 +54,7 @@ jobs:
|
|||||||
uses: ./.github/workflows/_mac-test.yml
|
uses: ./.github/workflows/_mac-test.yml
|
||||||
needs: macos-perf-py3-arm64-build
|
needs: macos-perf-py3-arm64-build
|
||||||
with:
|
with:
|
||||||
build-environment: macos-py3-arm64-distributed
|
build-environment: macos-py3-arm64
|
||||||
# Same as the build job
|
# Same as the build job
|
||||||
python-version: 3.9.12
|
python-version: 3.9.12
|
||||||
test-matrix: ${{ needs.macos-perf-py3-arm64-build.outputs.test-matrix }}
|
test-matrix: ${{ needs.macos-perf-py3-arm64-build.outputs.test-matrix }}
|
||||||
|
|||||||
30
.github/workflows/inductor-unittest.yml
vendored
30
.github/workflows/inductor-unittest.yml
vendored
@ -36,11 +36,11 @@ jobs:
|
|||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "inductor", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor_distributed", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu" },
|
{ config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor_cpp_wrapper", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor_cpp_wrapper", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor_cpp_wrapper", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor_cpp_wrapper", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
@ -65,8 +65,8 @@ jobs:
|
|||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "inductor", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ jobs:
|
|||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "inductor-halide", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
|
{ config: "inductor-halide", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
@ -114,7 +114,7 @@ jobs:
|
|||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "inductor-triton-cpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
|
{ config: "inductor-triton-cpu", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
@ -138,10 +138,10 @@ jobs:
|
|||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "inductor_amx", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "inductor_amx", shard: 1, num_shards: 2, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "inductor_amx", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "inductor_amx", shard: 2, num_shards: 2, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "inductor_avx2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.10xlarge.avx2" },
|
{ config: "inductor_avx2", shard: 1, num_shards: 2, runner: "linux.10xlarge.avx2" },
|
||||||
{ config: "inductor_avx2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.10xlarge.avx2" },
|
{ config: "inductor_avx2", shard: 2, num_shards: 2, runner: "linux.10xlarge.avx2" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
@ -165,8 +165,8 @@ jobs:
|
|||||||
cuda-arch-list: '8.6'
|
cuda-arch-list: '8.6'
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "inductor", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
|||||||
26
.github/workflows/inductor.yml
vendored
26
.github/workflows/inductor.yml
vendored
@ -53,11 +53,11 @@ jobs:
|
|||||||
sync-tag: linux-focal-cuda12_6-py3_10-gcc9-inductor-build
|
sync-tag: linux-focal-cuda12_6-py3_10-gcc9-inductor-build
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor_timm", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor_timm", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor_torchbench", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
{ config: "inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
|
{ config: "inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
@ -82,14 +82,14 @@ jobs:
|
|||||||
sync-tag: linux-jammy-cpu-py3_9-gcc11-inductor-build
|
sync-tag: linux-jammy-cpu-py3_9-gcc11-inductor-build
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "dynamic_cpu_inductor_huggingface", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "dynamic_cpu_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "dynamic_cpu_inductor_timm", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "dynamic_cpu_inductor_timm", shard: 1, num_shards: 2, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "dynamic_cpu_inductor_timm", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "dynamic_cpu_inductor_timm", shard: 2, num_shards: 2, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "dynamic_cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "dynamic_cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "dynamic_cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.amx" },
|
{ config: "dynamic_cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.8xlarge.amx" },
|
||||||
{ config: "inductor_torchbench_cpu_smoketest_perf", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.24xl.spr-metal" },
|
{ config: "inductor_torchbench_cpu_smoketest_perf", shard: 1, num_shards: 1, runner: "linux.24xl.spr-metal" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
|||||||
14
.github/workflows/linux-aarch64.yml
vendored
14
.github/workflows/linux-aarch64.yml
vendored
@ -37,13 +37,13 @@ jobs:
|
|||||||
runner: linux.arm64.2xlarge
|
runner: linux.arm64.2xlarge
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge" },
|
{ config: "default", shard: 1, num_shards: 4, runner: "linux.arm64.2xlarge" },
|
||||||
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge" },
|
{ config: "default", shard: 2, num_shards: 4, runner: "linux.arm64.2xlarge" },
|
||||||
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge" },
|
{ config: "default", shard: 3, num_shards: 4, runner: "linux.arm64.2xlarge" },
|
||||||
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge" },
|
{ config: "default", shard: 4, num_shards: 4, runner: "linux.arm64.2xlarge" },
|
||||||
{ config: "default", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m7g.4xlarge" },
|
{ config: "default", shard: 1, num_shards: 3, runner: "linux.arm64.m7g.4xlarge" },
|
||||||
{ config: "default", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m7g.4xlarge" },
|
{ config: "default", shard: 2, num_shards: 3, runner: "linux.arm64.m7g.4xlarge" },
|
||||||
{ config: "default", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m7g.4xlarge" },
|
{ config: "default", shard: 3, num_shards: 3, runner: "linux.arm64.m7g.4xlarge" },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
|||||||
81
.github/workflows/periodic-rocm-mi300.yml
vendored
81
.github/workflows/periodic-rocm-mi300.yml
vendored
@ -1,81 +0,0 @@
|
|||||||
name: periodic-rocm-mi300
|
|
||||||
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
# We have several schedules so jobs can check github.event.schedule to activate only for a fraction of the runs.
|
|
||||||
# Also run less frequently on weekends.
|
|
||||||
- cron: 45 0,8,16 * * 1-5
|
|
||||||
- cron: 45 4 * * 0,6
|
|
||||||
- cron: 45 4,12,20 * * 1-5
|
|
||||||
- cron: 45 12 * * 0,6
|
|
||||||
- cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- ciflow/periodic-rocm-mi300/*
|
|
||||||
branches:
|
|
||||||
- release/*
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}-${{ github.event.schedule }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
permissions: read-all
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
llm-td:
|
|
||||||
if: github.repository_owner == 'pytorch'
|
|
||||||
name: before-test
|
|
||||||
uses: ./.github/workflows/llm_td_retrieval.yml
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
target-determination:
|
|
||||||
name: before-test
|
|
||||||
uses: ./.github/workflows/target_determination.yml
|
|
||||||
needs: llm-td
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
get-label-type:
|
|
||||||
name: get-label-type
|
|
||||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
|
||||||
if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch'
|
|
||||||
with:
|
|
||||||
triggering_actor: ${{ github.triggering_actor }}
|
|
||||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
|
||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
|
||||||
|
|
||||||
linux-focal-rocm-py3_10-build:
|
|
||||||
name: linux-focal-rocm-py3.10
|
|
||||||
uses: ./.github/workflows/_linux-build.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
build-environment: linux-focal-rocm-py3.10
|
|
||||||
docker-image-name: pytorch-linux-focal-rocm-n-py3
|
|
||||||
test-matrix: |
|
|
||||||
{ include: [
|
|
||||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
|
|
||||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
|
|
||||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.mi300.4", owners: ["module:rocm", "oncall:distributed"] },
|
|
||||||
]}
|
|
||||||
secrets: inherit
|
|
||||||
|
|
||||||
linux-focal-rocm-py3_10-test:
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
name: linux-focal-rocm-py3.10
|
|
||||||
uses: ./.github/workflows/_rocm-test.yml
|
|
||||||
needs:
|
|
||||||
- linux-focal-rocm-py3_10-build
|
|
||||||
- target-determination
|
|
||||||
with:
|
|
||||||
build-environment: linux-focal-rocm-py3.10
|
|
||||||
docker-image: ${{ needs.linux-focal-rocm-py3_10-build.outputs.docker-image }}
|
|
||||||
test-matrix: ${{ needs.linux-focal-rocm-py3_10-build.outputs.test-matrix }}
|
|
||||||
secrets: inherit
|
|
||||||
16
.github/workflows/periodic.yml
vendored
16
.github/workflows/periodic.yml
vendored
@ -182,14 +182,14 @@ jobs:
|
|||||||
cuda-arch-list: 8.6
|
cuda-arch-list: 8.6
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "default", shard: 1, num_shards: 8, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
{ config: "default", shard: 1, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
||||||
{ config: "default", shard: 2, num_shards: 8, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
{ config: "default", shard: 2, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
||||||
{ config: "default", shard: 3, num_shards: 8, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
{ config: "default", shard: 3, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
||||||
{ config: "default", shard: 4, num_shards: 8, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
{ config: "default", shard: 4, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
||||||
{ config: "default", shard: 5, num_shards: 8, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
{ config: "default", shard: 5, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
||||||
{ config: "default", shard: 6, num_shards: 8, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
{ config: "default", shard: 6, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
||||||
{ config: "default", shard: 7, num_shards: 8, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
{ config: "default", shard: 7, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
||||||
{ config: "default", shard: 8, num_shards: 8, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
{ config: "default", shard: 8, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
|
||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
|||||||
5
.github/workflows/pull.yml
vendored
5
.github/workflows/pull.yml
vendored
@ -184,7 +184,7 @@ jobs:
|
|||||||
uses: ./.github/workflows/_linux-build.yml
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
with:
|
with:
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
|
||||||
build-environment: linux-focal-py3.9-clang10
|
build-environment: linux-focal-py3.9-clang10
|
||||||
docker-image-name: pytorch-linux-focal-py3.9-clang10
|
docker-image-name: pytorch-linux-focal-py3.9-clang10
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
@ -385,9 +385,6 @@ jobs:
|
|||||||
name: linux-focal-cpu-py3.10-gcc11-bazel-test
|
name: linux-focal-cpu-py3.10-gcc11-bazel-test
|
||||||
uses: ./.github/workflows/_bazel-build-test.yml
|
uses: ./.github/workflows/_bazel-build-test.yml
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
with:
|
with:
|
||||||
runner: "${{ needs.get-label-type.outputs.label-type }}linux.large"
|
runner: "${{ needs.get-label-type.outputs.label-type }}linux.large"
|
||||||
build-environment: linux-focal-cuda12.6-py3.10-gcc11-bazel-test
|
build-environment: linux-focal-cuda12.6-py3.10-gcc11-bazel-test
|
||||||
|
|||||||
2
.github/workflows/s390.yml
vendored
2
.github/workflows/s390.yml
vendored
@ -21,6 +21,6 @@ jobs:
|
|||||||
uses: ./.github/workflows/_linux-build.yml
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
with:
|
with:
|
||||||
build-environment: linux-s390x-binary-manywheel
|
build-environment: linux-s390x-binary-manywheel
|
||||||
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x
|
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
runner: linux.s390x
|
runner: linux.s390x
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|||||||
4
.github/workflows/s390x-periodic.yml
vendored
4
.github/workflows/s390x-periodic.yml
vendored
@ -42,7 +42,7 @@ jobs:
|
|||||||
uses: ./.github/workflows/_linux-build.yml
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
with:
|
with:
|
||||||
build-environment: linux-s390x-binary-manywheel
|
build-environment: linux-s390x-binary-manywheel
|
||||||
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x
|
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
runner: linux.s390x
|
runner: linux.s390x
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
@ -70,7 +70,7 @@ jobs:
|
|||||||
- target-determination
|
- target-determination
|
||||||
with:
|
with:
|
||||||
build-environment: linux-s390x-binary-manywheel
|
build-environment: linux-s390x-binary-manywheel
|
||||||
docker-image: pytorch/manylinuxs390x-builder:cpu-s390x
|
docker-image: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||||
test-matrix: ${{ needs.linux-manylinux-2_28-py3-cpu-s390x-build.outputs.test-matrix }}
|
test-matrix: ${{ needs.linux-manylinux-2_28-py3-cpu-s390x-build.outputs.test-matrix }}
|
||||||
timeout-minutes: 600
|
timeout-minutes: 600
|
||||||
use-gha: "yes"
|
use-gha: "yes"
|
||||||
|
|||||||
6
.github/workflows/slow.yml
vendored
6
.github/workflows/slow.yml
vendored
@ -143,9 +143,9 @@ jobs:
|
|||||||
docker-image-name: pytorch-linux-jammy-py3-clang15-asan
|
docker-image-name: pytorch-linux-jammy-py3-clang15-asan
|
||||||
test-matrix: |
|
test-matrix: |
|
||||||
{ include: [
|
{ include: [
|
||||||
{ config: "slow", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
{ config: "slow", shard: 1, num_shards: 3, runner: "linux.4xlarge" },
|
||||||
{ config: "slow", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
{ config: "slow", shard: 2, num_shards: 3, runner: "linux.4xlarge" },
|
||||||
{ config: "slow", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
{ config: "slow", shard: 3, num_shards: 3, runner: "linux.4xlarge" },
|
||||||
]}
|
]}
|
||||||
sync-tag: asan-build
|
sync-tag: asan-build
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|||||||
2
.github/workflows/upload-test-stats.yml
vendored
2
.github/workflows/upload-test-stats.yml
vendored
@ -2,7 +2,7 @@ name: Upload test stats
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_run:
|
workflow_run:
|
||||||
workflows: [pull, trunk, periodic, periodic-rocm-mi300, inductor, unstable, slow, unstable-periodic, inductor-periodic, rocm, rocm-mi300, inductor-micro-benchmark, inductor-micro-benchmark-x86, inductor-cu124, inductor-rocm, inductor-rocm-mi300, mac-mps]
|
workflows: [pull, trunk, periodic, inductor, unstable, slow, unstable-periodic, inductor-periodic, rocm, rocm-mi300, inductor-micro-benchmark, inductor-micro-benchmark-x86, inductor-cu124, inductor-rocm, inductor-rocm-mi300, mac-mps]
|
||||||
types:
|
types:
|
||||||
- completed
|
- completed
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@ name: Upload torch dynamo performance stats
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_run:
|
workflow_run:
|
||||||
workflows: [inductor-A100-perf-nightly, inductor-perf-nightly-A10g, inductor-perf-nightly-aarch64, inductor-perf-nightly-x86, inductor-perf-nightly-macos, inductor-perf-nightly-rocm, inductor-perf-nightly-h100]
|
workflows: [inductor-A100-perf-nightly, inductor-perf-nightly-A10g, inductor-perf-nightly-aarch64, inductor-perf-nightly-x86, perf-nightly-macos, inductor-perf-nightly-rocm, inductor-perf-nightly-h100]
|
||||||
types:
|
types:
|
||||||
- completed
|
- completed
|
||||||
|
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -178,7 +178,6 @@ compile_commands.json
|
|||||||
*.egg-info/
|
*.egg-info/
|
||||||
docs/source/scripts/activation_images/
|
docs/source/scripts/activation_images/
|
||||||
docs/source/scripts/quantization_backend_configs/
|
docs/source/scripts/quantization_backend_configs/
|
||||||
docs/source/scripts/lr_scheduler_images/
|
|
||||||
|
|
||||||
## General
|
## General
|
||||||
|
|
||||||
|
|||||||
@ -1165,6 +1165,14 @@ exclude_patterns = [
|
|||||||
'test/quantization/core/test_utils.py',
|
'test/quantization/core/test_utils.py',
|
||||||
'test/quantization/core/test_workflow_module.py',
|
'test/quantization/core/test_workflow_module.py',
|
||||||
'test/quantization/core/test_workflow_ops.py',
|
'test/quantization/core/test_workflow_ops.py',
|
||||||
|
'test/quantization/eager/__init__.py',
|
||||||
|
'test/quantization/eager/test_bias_correction_eager.py',
|
||||||
|
'test/quantization/eager/test_equalize_eager.py',
|
||||||
|
'test/quantization/eager/test_fuse_eager.py',
|
||||||
|
'test/quantization/eager/test_model_numerics.py',
|
||||||
|
'test/quantization/eager/test_numeric_suite_eager.py',
|
||||||
|
'test/quantization/eager/test_quantize_eager_ptq.py',
|
||||||
|
'test/quantization/eager/test_quantize_eager_qat.py',
|
||||||
'test/quantization/fx/__init__.py',
|
'test/quantization/fx/__init__.py',
|
||||||
'test/quantization/fx/test_equalize_fx.py',
|
'test/quantization/fx/test_equalize_fx.py',
|
||||||
'test/quantization/fx/test_model_report_fx.py',
|
'test/quantization/fx/test_model_report_fx.py',
|
||||||
@ -1715,7 +1723,7 @@ command = [
|
|||||||
'@{{PATHSFILE}}'
|
'@{{PATHSFILE}}'
|
||||||
]
|
]
|
||||||
include_patterns = [
|
include_patterns = [
|
||||||
'torch/_inductor/**/*.py'
|
'torch/**/not-exist.py'
|
||||||
]
|
]
|
||||||
is_formatter = false
|
is_formatter = false
|
||||||
|
|
||||||
|
|||||||
12
BUILD.bazel
12
BUILD.bazel
@ -1,5 +1,4 @@
|
|||||||
load("@bazel_skylib//lib:paths.bzl", "paths")
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library")
|
|
||||||
load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
|
load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
|
||||||
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
|
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
|
||||||
load("@rules_python//python:defs.bzl", "py_library", "py_test")
|
load("@rules_python//python:defs.bzl", "py_library", "py_test")
|
||||||
@ -660,15 +659,6 @@ cc_library(
|
|||||||
# torch
|
# torch
|
||||||
torch_cuda_headers = glob(["torch/csrc/cuda/*.h"])
|
torch_cuda_headers = glob(["torch/csrc/cuda/*.h"])
|
||||||
|
|
||||||
flatbuffer_cc_library(
|
|
||||||
name = "torch_flatbuffers",
|
|
||||||
srcs = [
|
|
||||||
"torch/csrc/jit/serialization/mobile_bytecode.fbs",
|
|
||||||
],
|
|
||||||
flatc_args = ["--cpp", "--gen-mutable", "--scoped-enums"],
|
|
||||||
out_prefix = "torch/csrc/jit/serialization/",
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "torch_headers",
|
name = "torch_headers",
|
||||||
hdrs = if_cuda(
|
hdrs = if_cuda(
|
||||||
@ -682,7 +672,6 @@ cc_library(
|
|||||||
],
|
],
|
||||||
exclude = [
|
exclude = [
|
||||||
"torch/csrc/*/generated/*.h",
|
"torch/csrc/*/generated/*.h",
|
||||||
"torch/csrc/jit/serialization/mobile_bytecode_generated.h",
|
|
||||||
] + torch_cuda_headers,
|
] + torch_cuda_headers,
|
||||||
) + GENERATED_AUTOGRAD_CPP + [":version_h"],
|
) + GENERATED_AUTOGRAD_CPP + [":version_h"],
|
||||||
includes = [
|
includes = [
|
||||||
@ -697,7 +686,6 @@ cc_library(
|
|||||||
deps = [
|
deps = [
|
||||||
":aten_headers",
|
":aten_headers",
|
||||||
":caffe2_headers",
|
":caffe2_headers",
|
||||||
":torch_flatbuffers",
|
|
||||||
"//c10",
|
"//c10",
|
||||||
"@com_github_google_flatbuffers//:flatbuffers",
|
"@com_github_google_flatbuffers//:flatbuffers",
|
||||||
"@local_config_python//:python_headers",
|
"@local_config_python//:python_headers",
|
||||||
|
|||||||
@ -165,9 +165,9 @@ caffe2/utils/hip @jeffdaily @jithunnair-amd
|
|||||||
/torch/_export/ @avikchaudhuri @tugsbayasgalan @zhxchen17 @ydwu4 @angelayi
|
/torch/_export/ @avikchaudhuri @tugsbayasgalan @zhxchen17 @ydwu4 @angelayi
|
||||||
|
|
||||||
# Dynamic Shapes
|
# Dynamic Shapes
|
||||||
/torch/fx/experimental/symbolic_shapes.py @bobrenjc93 @laithsakka
|
/torch/fx/experimental/symbolic_shapes.py @bobren @laithsakka
|
||||||
/torch/fx/experimental/sym_node.py @bobrenjc93 @laithsakka
|
/torch/fx/experimental/sym_node.py @bobren @laithsakka
|
||||||
/torch/fx/experimental/recording.py @bobrenjc93 @laithsakka
|
/torch/fx/experimental/recording.py @bobren @laithsakka
|
||||||
|
|
||||||
# serialization-related files
|
# serialization-related files
|
||||||
/aten/src/ATen/MapAllocator* @mikaylagawarecki
|
/aten/src/ATen/MapAllocator* @mikaylagawarecki
|
||||||
|
|||||||
@ -182,7 +182,7 @@ NestedTensorImpl::NestedTensorImpl(
|
|||||||
"coverage, and works with torch.compile.");
|
"coverage, and works with torch.compile.");
|
||||||
auto storage_device = storage_.device();
|
auto storage_device = storage_.device();
|
||||||
TORCH_INTERNAL_ASSERT(
|
TORCH_INTERNAL_ASSERT(
|
||||||
storage_device.is_cpu() || storage_device.is_cuda() || storage_device.is_xpu() || storage_device.is_hpu() || storage_device.is_privateuseone(),
|
storage_device.is_cpu() || storage_device.is_cuda() || storage_device.is_xpu() || storage_device.is_privateuseone(),
|
||||||
"NestedTensorImpl storage must be either CUDA, CPU, XPU or ", get_privateuse1_backend(), " but got ",
|
"NestedTensorImpl storage must be either CUDA, CPU, XPU or ", get_privateuse1_backend(), " but got ",
|
||||||
storage_device);
|
storage_device);
|
||||||
validate_nested_tensor_metadata(nested_sizes_, nested_strides_, storage_offsets_);
|
validate_nested_tensor_metadata(nested_sizes_, nested_strides_, storage_offsets_);
|
||||||
|
|||||||
@ -29,20 +29,12 @@ struct TORCH_API OpaqueTensorImpl : public TensorImpl {
|
|||||||
bool is_non_overlapping_and_dense = true)
|
bool is_non_overlapping_and_dense = true)
|
||||||
: TensorImpl(key_set, data_type, device),
|
: TensorImpl(key_set, data_type, device),
|
||||||
opaque_handle_(std::move(opaque_handle)) {
|
opaque_handle_(std::move(opaque_handle)) {
|
||||||
constructor_impl(sizes, is_non_overlapping_and_dense);
|
set_storage_access_should_throw();
|
||||||
}
|
set_custom_sizes_strides(SizesStridesPolicy::CustomStrides);
|
||||||
|
sizes_and_strides_.set_sizes(sizes);
|
||||||
OpaqueTensorImpl(
|
refresh_numel();
|
||||||
TensorImpl::ImplType impl_type,
|
// NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
|
||||||
c10::Storage&& storage,
|
is_non_overlapping_and_dense_ = is_non_overlapping_and_dense;
|
||||||
at::DispatchKeySet key_set,
|
|
||||||
const caffe2::TypeMeta data_type,
|
|
||||||
OpaqueHandle opaque_handle,
|
|
||||||
c10::IntArrayRef sizes,
|
|
||||||
bool is_non_overlapping_and_dense = true)
|
|
||||||
: TensorImpl(impl_type, std::move(storage), key_set, data_type),
|
|
||||||
opaque_handle_(std::move(opaque_handle)) {
|
|
||||||
constructor_impl(sizes, is_non_overlapping_and_dense);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Destructor doesn't call release_resources because it's
|
// Destructor doesn't call release_resources because it's
|
||||||
@ -189,17 +181,6 @@ struct TORCH_API OpaqueTensorImpl : public TensorImpl {
|
|||||||
return "OpaqueTensorImpl";
|
return "OpaqueTensorImpl";
|
||||||
}
|
}
|
||||||
|
|
||||||
void constructor_impl(
|
|
||||||
c10::IntArrayRef sizes,
|
|
||||||
bool is_non_overlapping_and_dense) {
|
|
||||||
set_storage_access_should_throw();
|
|
||||||
set_custom_sizes_strides(SizesStridesPolicy::CustomStrides);
|
|
||||||
sizes_and_strides_.set_sizes(sizes);
|
|
||||||
refresh_numel();
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
|
|
||||||
is_non_overlapping_and_dense_ = is_non_overlapping_and_dense;
|
|
||||||
}
|
|
||||||
|
|
||||||
OpaqueHandle opaque_handle_;
|
OpaqueHandle opaque_handle_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -10,13 +10,15 @@
|
|||||||
#include <mkl.h>
|
#include <mkl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if AT_MKLDNN_ENABLED()
|
|
||||||
#include <ATen/native/mkldnn/IDeepRegistration.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
|
#if AT_MKLDNN_ENABLED()
|
||||||
|
namespace native::mkldnn {
|
||||||
|
// NOLINTNEXTLINE(misc-use-internal-linkage)
|
||||||
|
void clear_computation_cache();
|
||||||
|
} // namespace native::mkldnn
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// Number of threads set by the user
|
// Number of threads set by the user
|
||||||
|
|||||||
@ -222,8 +222,8 @@ inline Tensor applySlice(
|
|||||||
? (*self_sizes)[dim]
|
? (*self_sizes)[dim]
|
||||||
: self.sym_size(dim);
|
: self.sym_size(dim);
|
||||||
if (!disable_slice_optimization &&
|
if (!disable_slice_optimization &&
|
||||||
TORCH_STATICALLY_KNOWN_TRUE(start.sym_eq(0)) &&
|
TORCH_GUARD_SIZE_OBLIVIOUS(start.sym_eq(0)) &&
|
||||||
TORCH_STATICALLY_KNOWN_TRUE(length.sym_eq(stop)) && step == 1) {
|
TORCH_GUARD_SIZE_OBLIVIOUS(length.sym_eq(stop)) && step == 1) {
|
||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,33 +0,0 @@
|
|||||||
#include <ATen/core/CachingHostAllocator.h>
|
|
||||||
|
|
||||||
#include <array>
|
|
||||||
|
|
||||||
namespace at {
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
static std::array<HostAllocator*, at::COMPILE_TIME_MAX_DEVICE_TYPES>
|
|
||||||
allocator_array{};
|
|
||||||
static std::array<uint8_t, at::COMPILE_TIME_MAX_DEVICE_TYPES>
|
|
||||||
allocator_priority{};
|
|
||||||
|
|
||||||
} // anonymous namespace
|
|
||||||
|
|
||||||
void setHostAllocator(
|
|
||||||
at::DeviceType device_type,
|
|
||||||
at::HostAllocator* allocator,
|
|
||||||
uint8_t priority) {
|
|
||||||
if (priority >= allocator_priority[static_cast<int>(device_type)]) {
|
|
||||||
allocator_array[static_cast<int>(device_type)] = allocator;
|
|
||||||
allocator_priority[static_cast<int>(device_type)] = priority;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
at::HostAllocator* getHostAllocator(at::DeviceType device_type) {
|
|
||||||
auto* allocator = allocator_array[static_cast<int>(device_type)];
|
|
||||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
|
|
||||||
allocator, "Host Allocator for ", device_type, " is not set.");
|
|
||||||
return allocator;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace at
|
|
||||||
@ -1,5 +1,4 @@
|
|||||||
#include <c10/core/Allocator.h>
|
#include <c10/core/Allocator.h>
|
||||||
#include <c10/core/Stream.h>
|
|
||||||
#include <c10/core/thread_pool.h>
|
#include <c10/core/thread_pool.h>
|
||||||
#include <c10/util/flat_hash_map.h>
|
#include <c10/util/flat_hash_map.h>
|
||||||
#include <c10/util/llvmMathExtras.h>
|
#include <c10/util/llvmMathExtras.h>
|
||||||
@ -47,7 +46,7 @@ namespace {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Struct containing memory allocator summary statistics for host.
|
// Struct containing memory allocator summary statistics for host.
|
||||||
struct TORCH_API HostStats {
|
struct HostStats {
|
||||||
// COUNT: allocations requested by client code. Note that active
|
// COUNT: allocations requested by client code. Note that active
|
||||||
// count can be extracted by looking at current allocations
|
// count can be extracted by looking at current allocations
|
||||||
Stat allocation;
|
Stat allocation;
|
||||||
@ -275,8 +274,7 @@ struct CachingHostAllocatorImpl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool record_event(void* ptr, void* ctx, c10::Stream s) {
|
virtual bool record_event(void* ptr, void* ctx, S stream) {
|
||||||
S stream = S(s);
|
|
||||||
auto* block = reinterpret_cast<B*>(ctx);
|
auto* block = reinterpret_cast<B*>(ctx);
|
||||||
|
|
||||||
// Note: we need to check if the passed-in `ctx` is valid. This is because
|
// Note: we need to check if the passed-in `ctx` is valid. This is because
|
||||||
@ -622,49 +620,24 @@ protected:
|
|||||||
alignas(64) HostStatsStaged stats_;
|
alignas(64) HostStatsStaged stats_;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TORCH_API HostAllocator : public at::Allocator {
|
template <typename T>
|
||||||
// Associates the pinned memory allocation with a stream to track
|
struct CachingHostAllocatorInterface : public at::Allocator {
|
||||||
// dependencies. This ensures the memory won't be reused until the stream's
|
|
||||||
// operations complete
|
|
||||||
virtual bool record_event(void* ptr, void* ctx, c10::Stream stream) = 0;
|
|
||||||
|
|
||||||
// Frees all cached pinned memory and returns it to the system, clearing the
|
|
||||||
// allocator's internal cache
|
|
||||||
virtual void empty_cache() = 0;
|
|
||||||
|
|
||||||
// Returns comprehensive statistics about the allocator's memory usage,
|
|
||||||
// allocation patterns, and timing metrics
|
|
||||||
virtual HostStats get_stats() = 0;
|
|
||||||
|
|
||||||
// Resets the cumulative allocation statistics
|
|
||||||
virtual void reset_accumulated_stats() = 0;
|
|
||||||
|
|
||||||
// Resets the peak memory usage metrics
|
|
||||||
virtual void reset_peak_stats() = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, c10::DeleterFnPtr deleteFunc>
|
|
||||||
struct CachingHostAllocatorInterface : public HostAllocator {
|
|
||||||
CachingHostAllocatorInterface() : impl_(std::make_unique<T>()) {}
|
CachingHostAllocatorInterface() : impl_(std::make_unique<T>()) {}
|
||||||
|
|
||||||
at::DataPtr allocate(size_t size) override {
|
at::DataPtr allocate(size_t size) override {
|
||||||
auto ptr_and_ctx = impl_->allocate(size);
|
TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for allocate");
|
||||||
return {
|
|
||||||
ptr_and_ctx.first,
|
|
||||||
ptr_and_ctx.second,
|
|
||||||
deleteFunc, // Use the template parameter deleter function
|
|
||||||
at::DeviceType::CPU};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void free(void* ctx) {
|
void free(void* ctx) {
|
||||||
impl_->free(ctx);
|
impl_->free(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool record_event(void* ptr, void* ctx, c10::Stream stream) override {
|
template <typename S>
|
||||||
|
bool record_event(void* ptr, void* ctx, S stream) {
|
||||||
return impl_->record_event(ptr, ctx, stream);
|
return impl_->record_event(ptr, ctx, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void empty_cache() override {
|
void empty_cache() {
|
||||||
impl_->empty_cache();
|
impl_->empty_cache();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -673,54 +646,20 @@ struct CachingHostAllocatorInterface : public HostAllocator {
|
|||||||
impl_->copy_data(dest, src, count);
|
impl_->copy_data(dest, src, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
HostStats get_stats() override {
|
HostStats getStats() {
|
||||||
return impl_->getStats();
|
return impl_->getStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset_accumulated_stats() override {
|
void resetAccumulatedStats() {
|
||||||
impl_->resetAccumulatedStats();
|
impl_->resetAccumulatedStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset_peak_stats() override {
|
void resetPeakStats() {
|
||||||
impl_->resetPeakStats();
|
impl_->resetPeakStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<T> impl_;
|
std::unique_ptr<T> impl_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define DECLARE_HOST_ALLOCATOR(name, impl, deleter, instance) \
|
|
||||||
void deleter(void* ptr); \
|
|
||||||
struct name final \
|
|
||||||
: public at::CachingHostAllocatorInterface<impl, deleter> {}; \
|
|
||||||
static name instance; \
|
|
||||||
void deleter(void* ptr) { \
|
|
||||||
instance.free(ptr); \
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the host allocator for DeviceType `device_type`. This allocator manages
|
|
||||||
* pinned memory on the host that can be accessed efficiently by the specified
|
|
||||||
* device type. Note that this function is not thread-safe.
|
|
||||||
*/
|
|
||||||
TORCH_API void setHostAllocator(
|
|
||||||
at::DeviceType device_type,
|
|
||||||
at::HostAllocator* allocator,
|
|
||||||
uint8_t priority = 0);
|
|
||||||
|
|
||||||
TORCH_API at::HostAllocator* getHostAllocator(at::DeviceType device_type);
|
|
||||||
|
|
||||||
template <DeviceType device_type>
|
|
||||||
struct HostAllocatorRegistry {
|
|
||||||
explicit HostAllocatorRegistry(HostAllocator* allocator) {
|
|
||||||
at::setHostAllocator(device_type, allocator);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
#define REGISTER_HOST_ALLOCATOR(device_type, allocator) \
|
|
||||||
namespace { \
|
|
||||||
static at::HostAllocatorRegistry<device_type> \
|
|
||||||
g_host_allocator_registry_instance(allocator); \
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace at
|
} // namespace at
|
||||||
C10_DIAGNOSTIC_POP()
|
C10_DIAGNOSTIC_POP()
|
||||||
|
|||||||
@ -41,15 +41,9 @@ FunctionSchema FunctionSchema::cloneWithRealTypes(bool with_symint) const {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
std::vector<Argument> new_arguments, new_returns;
|
std::vector<Argument> new_arguments, new_returns;
|
||||||
new_arguments.reserve(arguments().size());
|
std::transform(arguments().begin(), arguments().end(), std::back_inserter(new_arguments), cloneWithRealTypes);
|
||||||
for (const auto& arg: arguments()) {
|
|
||||||
new_arguments.push_back(cloneWithRealTypes(arg));
|
|
||||||
}
|
|
||||||
// NB: SymInt returns are always SymInt
|
// NB: SymInt returns are always SymInt
|
||||||
new_returns.reserve(returns().size());
|
std::transform(returns().begin(), returns().end(), std::back_inserter(new_returns), alwaysCloneWithRealTypes);
|
||||||
for (const auto& ret: returns()) {
|
|
||||||
new_returns.push_back(alwaysCloneWithRealTypes(ret));
|
|
||||||
}
|
|
||||||
return FunctionSchema(
|
return FunctionSchema(
|
||||||
name(),
|
name(),
|
||||||
overload_name(),
|
overload_name(),
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
#include <torch/library.h>
|
#include <torch/library.h>
|
||||||
|
|
||||||
#include <ATen/core/dispatch/Dispatcher.h>
|
#include <ATen/core/dispatch/Dispatcher.h>
|
||||||
#include <fmt/format.h>
|
|
||||||
|
|
||||||
namespace torch {
|
namespace torch {
|
||||||
|
|
||||||
@ -12,7 +11,7 @@ namespace {
|
|||||||
#ifdef STRIP_ERROR_MESSAGES
|
#ifdef STRIP_ERROR_MESSAGES
|
||||||
return std::string();
|
return std::string();
|
||||||
#else
|
#else
|
||||||
return fmt::format("registered at {}:{}", file, line);
|
return c10::str("registered at ", file, ":", line);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -248,6 +248,7 @@ namespace at::cuda::blas {
|
|||||||
CUDABLAS_NONNEGINT_CHECK(bgemm<Dtype>, num_batches); \
|
CUDABLAS_NONNEGINT_CHECK(bgemm<Dtype>, num_batches); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// Following the pattern of CuSparseDescriptor
|
// Following the pattern of CuSparseDescriptor
|
||||||
// Defined here for now because this is the only place cublas_lt interface is
|
// Defined here for now because this is the only place cublas_lt interface is
|
||||||
@ -333,10 +334,9 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype>
|
template <typename Dtype>
|
||||||
static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES(Dtype)) {
|
||||||
cudaDataType_t abType = CUDA_R_32F;
|
cudaDataType_t abcType = CUDA_R_32F;
|
||||||
cudaDataType_t cType = CUDA_R_32F;
|
|
||||||
cublasComputeType_t computeType = CUBLAS_COMPUTE_32F;
|
cublasComputeType_t computeType = CUBLAS_COMPUTE_32F;
|
||||||
cudaDataType_t scaleType = CUDA_R_32F;
|
cudaDataType_t scaleType = CUDA_R_32F;
|
||||||
#ifndef USE_ROCM
|
#ifndef USE_ROCM
|
||||||
@ -346,8 +346,7 @@ static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(D
|
|||||||
void * alpha_ptr = α
|
void * alpha_ptr = α
|
||||||
void * beta_ptr = β
|
void * beta_ptr = β
|
||||||
if constexpr (std::is_same_v<Dtype, double>) {
|
if constexpr (std::is_same_v<Dtype, double>) {
|
||||||
abType = CUDA_R_64F;
|
abcType = CUDA_R_64F;
|
||||||
cType = CUDA_R_64F;
|
|
||||||
computeType = CUBLAS_COMPUTE_64F;
|
computeType = CUBLAS_COMPUTE_64F;
|
||||||
scaleType = CUDA_R_64F;
|
scaleType = CUDA_R_64F;
|
||||||
} else if constexpr (std::is_same_v<Dtype, float>) {
|
} else if constexpr (std::is_same_v<Dtype, float>) {
|
||||||
@ -355,13 +354,11 @@ static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(D
|
|||||||
computeType = CUBLAS_COMPUTE_32F_FAST_TF32;
|
computeType = CUBLAS_COMPUTE_32F_FAST_TF32;
|
||||||
}
|
}
|
||||||
} else if constexpr (std::is_same_v<Dtype, c10::complex<double>>) {
|
} else if constexpr (std::is_same_v<Dtype, c10::complex<double>>) {
|
||||||
abType = CUDA_C_64F;
|
abcType = CUDA_C_64F;
|
||||||
cType = CUDA_C_64F;
|
|
||||||
computeType = CUBLAS_COMPUTE_64F;
|
computeType = CUBLAS_COMPUTE_64F;
|
||||||
scaleType = CUDA_C_64F;
|
scaleType = CUDA_C_64F;
|
||||||
} else if constexpr (std::is_same_v<Dtype, c10::complex<float>>) {
|
} else if constexpr (std::is_same_v<Dtype, c10::complex<float>>) {
|
||||||
abType = CUDA_C_32F;
|
abcType = CUDA_C_32F;
|
||||||
cType = CUDA_C_32F;
|
|
||||||
scaleType = CUDA_C_32F;
|
scaleType = CUDA_C_32F;
|
||||||
} else if constexpr (std::is_same_v<Dtype, at::Half>) {
|
} else if constexpr (std::is_same_v<Dtype, at::Half>) {
|
||||||
#ifndef USE_ROCM
|
#ifndef USE_ROCM
|
||||||
@ -374,11 +371,9 @@ static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(D
|
|||||||
beta_ptr = &hbeta;
|
beta_ptr = &hbeta;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
abType = CUDA_R_16F;
|
abcType = CUDA_R_16F;
|
||||||
cType = (std::is_same_v<C_Dtype, float>) ? CUDA_R_32F : CUDA_R_16F;
|
|
||||||
} else if constexpr (std::is_same_v<Dtype, at::BFloat16>) {
|
} else if constexpr (std::is_same_v<Dtype, at::BFloat16>) {
|
||||||
abType = CUDA_R_16BF;
|
abcType = CUDA_R_16BF;
|
||||||
cType = (std::is_same_v<C_Dtype, float>) ? CUDA_R_32F : CUDA_R_16BF;
|
|
||||||
} else {
|
} else {
|
||||||
static_assert(false && sizeof(Dtype), "at::cuda::blas::bgemm_internal_cublaslt: not implemented");
|
static_assert(false && sizeof(Dtype), "at::cuda::blas::bgemm_internal_cublaslt: not implemented");
|
||||||
}
|
}
|
||||||
@ -400,9 +395,9 @@ static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(D
|
|||||||
at::globalContext()._SMCarveout_EXPERIMENTAL().value());
|
at::globalContext()._SMCarveout_EXPERIMENTAL().value());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
CuBlasLtMatrixLayout Adesc(abType, m, k, lda, opa == CUBLAS_OP_T);
|
CuBlasLtMatrixLayout Adesc(abcType, m, k, lda, opa == CUBLAS_OP_T);
|
||||||
CuBlasLtMatrixLayout Bdesc(abType, k, n, ldb, opb == CUBLAS_OP_T);
|
CuBlasLtMatrixLayout Bdesc(abcType, k, n, ldb, opb == CUBLAS_OP_T);
|
||||||
CuBlasLtMatrixLayout Cdesc(cType, m, n, ldc);
|
CuBlasLtMatrixLayout Cdesc(abcType, m, n, ldc);
|
||||||
|
|
||||||
if (num_batches > 1) {
|
if (num_batches > 1) {
|
||||||
int num_batches_as_int = static_cast<int>(num_batches);
|
int num_batches_as_int = static_cast<int>(num_batches);
|
||||||
@ -487,10 +482,8 @@ static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(D
|
|||||||
ldb,
|
ldb,
|
||||||
" ldc ",
|
" ldc ",
|
||||||
ldc,
|
ldc,
|
||||||
" abType ",
|
" abcType ",
|
||||||
abType,
|
abcType,
|
||||||
" cType ",
|
|
||||||
cType,
|
|
||||||
" computeType ",
|
" computeType ",
|
||||||
computeType,
|
computeType,
|
||||||
" scaleType ",
|
" scaleType ",
|
||||||
@ -502,9 +495,9 @@ static inline bool bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(D
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype>
|
template <typename Dtype>
|
||||||
inline void bgemm_internal_cublas(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
inline void bgemm_internal_cublas(CUDABLAS_BGEMM_ARGTYPES(Dtype)) {
|
||||||
TORCH_CHECK(false, "at::cuda::blas::bgemm: not implemented for input type ", typeid(Dtype).name(), " and output type ", typeid(C_Dtype).name());
|
static_assert(false && sizeof(Dtype), "at::cuda::blas::bgemm_internal_cublas: not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -563,8 +556,8 @@ void bgemm_internal_cublas<c10::complex<float>>(CUDABLAS_BGEMM_ARGTYPES(c10::com
|
|||||||
reinterpret_cast<cuComplex*>(c), ldc, stridec, num_batches));
|
reinterpret_cast<cuComplex*>(c), ldc, stridec, num_batches));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename C_Dtype>
|
template <>
|
||||||
inline void bgemm_internal_cublas_half_helper(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, C_Dtype)) {
|
void bgemm_internal_cublas<at::Half>(CUDABLAS_BGEMM_ARGTYPES(at::Half)) {
|
||||||
// See Note [Writing Nondeterministic Operations]
|
// See Note [Writing Nondeterministic Operations]
|
||||||
globalContext().alertCuBLASConfigNotDeterministic();
|
globalContext().alertCuBLASConfigNotDeterministic();
|
||||||
cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
|
cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
|
||||||
@ -609,33 +602,23 @@ inline void bgemm_internal_cublas_half_helper(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYP
|
|||||||
handle, opa, opb, m, n, k,
|
handle, opa, opb, m, n, k,
|
||||||
alpha_ptr, a, CUDA_R_16F, lda, stridea,
|
alpha_ptr, a, CUDA_R_16F, lda, stridea,
|
||||||
b, CUDA_R_16F, ldb, strideb, beta_ptr,
|
b, CUDA_R_16F, ldb, strideb, beta_ptr,
|
||||||
c, std::is_same_v<C_Dtype, float> ? CUDA_R_32F : CUDA_R_16F, ldc, stridec,
|
c, CUDA_R_16F, ldc, stridec,
|
||||||
num_batches, compute_type, CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
num_batches, compute_type, CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
||||||
} else {
|
} else {
|
||||||
for (const auto i : c10::irange(num_batches)) {
|
for (const auto i : c10::irange(num_batches)) {
|
||||||
if (std::is_same_v<C_Dtype, float>) {
|
at::cuda::blas::gemm<at::Half>(
|
||||||
float* c_ptr = (float*)(c + i * stridec);
|
transa, transb,
|
||||||
at::cuda::blas::gemm<at::Half, float>(
|
m, n, k,
|
||||||
transa, transb,
|
alpha, (a + i * stridea), lda,
|
||||||
m, n, k,
|
(b + i * strideb), ldb, beta,
|
||||||
alpha, (a + i * stridea), lda,
|
(c + i * stridec), ldc);
|
||||||
(b + i * strideb), ldb, beta,
|
|
||||||
c_ptr, ldc);
|
|
||||||
} else {
|
|
||||||
at::cuda::blas::gemm<at::Half>(
|
|
||||||
transa, transb,
|
|
||||||
m, n, k,
|
|
||||||
alpha, (a + i * stridea), lda,
|
|
||||||
(b + i * strideb), ldb, beta,
|
|
||||||
(c + i * stridec), ldc);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // USE_ROCM
|
#endif // USE_ROCM
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename C_Dtype>
|
template <>
|
||||||
inline void bgemm_internal_cublas_bfloat16_helper(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, C_Dtype)) {
|
void bgemm_internal_cublas<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) {
|
||||||
// See Note [Writing Nondeterministic Operations]
|
// See Note [Writing Nondeterministic Operations]
|
||||||
globalContext().alertCuBLASConfigNotDeterministic();
|
globalContext().alertCuBLASConfigNotDeterministic();
|
||||||
BGEMM_CHECK_ARGVALUES(at::BFloat16);
|
BGEMM_CHECK_ARGVALUES(at::BFloat16);
|
||||||
@ -652,37 +635,15 @@ inline void bgemm_internal_cublas_bfloat16_helper(CUDABLAS_BGEMM_ARGTYPES_AND_C_
|
|||||||
auto compute_type = CUDA_R_32F;
|
auto compute_type = CUDA_R_32F;
|
||||||
#endif
|
#endif
|
||||||
TORCH_CUDABLAS_CHECK(cublasGemmStridedBatchedEx(handle,
|
TORCH_CUDABLAS_CHECK(cublasGemmStridedBatchedEx(handle,
|
||||||
opa, opb, (int)m, (int)n, (int)k,
|
opa, opb, (int)m, (int)n, (int)k,
|
||||||
(void*)&falpha, a, CUDA_R_16BF, (int)lda, stridea,
|
(void*)&falpha, a, CUDA_R_16BF, (int)lda, stridea,
|
||||||
b, CUDA_R_16BF, (int)ldb, strideb,
|
b, CUDA_R_16BF, (int)ldb, strideb,
|
||||||
(void*)&fbeta, c, std::is_same_v<C_Dtype, float> ? CUDA_R_32F : CUDA_R_16BF,
|
(void*)&fbeta, c, CUDA_R_16BF, (int)ldc, stridec,
|
||||||
(int)ldc, stridec, (int)num_batches,
|
(int)num_batches,
|
||||||
compute_type,
|
compute_type,
|
||||||
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
|
||||||
void bgemm_internal_cublas<at::Half>(CUDABLAS_BGEMM_ARGTYPES(at::Half)) {
|
|
||||||
bgemm_internal_cublas_half_helper<at::Half>(CUDABLAS_BGEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void bgemm_internal_cublas<at::Half, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)) {
|
|
||||||
bgemm_internal_cublas_half_helper<float>(CUDABLAS_BGEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void bgemm_internal_cublas<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) {
|
|
||||||
bgemm_internal_cublas_bfloat16_helper<at::BFloat16>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void bgemm_internal_cublas<at::BFloat16, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)) {
|
|
||||||
bgemm_internal_cublas_bfloat16_helper<float>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void bgemm_internal<double>(CUDABLAS_BGEMM_ARGTYPES(double))
|
void bgemm_internal<double>(CUDABLAS_BGEMM_ARGTYPES(double))
|
||||||
{
|
{
|
||||||
@ -781,50 +742,9 @@ void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16))
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template <typename DType>
|
||||||
void bgemm_internal<at::Half, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float))
|
inline void bgemm_tunable(CUDABLAS_BGEMM_ARGTYPES(DType)) {
|
||||||
{
|
tunable::GemmStridedBatchedParams<DType> params;
|
||||||
if (at::globalContext().allowFP16AccumulationCuBLAS()) {
|
|
||||||
// Do not allow fp16 reductions with fp32 output
|
|
||||||
TORCH_CHECK(false, "bgemm input type at::Half and output type float is not supported with allowFP16AccumulationCuBLAS");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
if (!bgemm_internal_cublaslt<at::Half, float>(CUDABLAS_BGEMM_ARGS(at::Half))) {
|
|
||||||
bgemm_internal_cublas<at::Half, float>(CUDABLAS_BGEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else {
|
|
||||||
bgemm_internal_cublas<at::Half, float>(CUDABLAS_BGEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
void bgemm_internal<at::BFloat16, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float))
|
|
||||||
{
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
if (!bgemm_internal_cublaslt<at::BFloat16, float>(CUDABLAS_BGEMM_ARGS(at::BFloat16))) {
|
|
||||||
bgemm_internal_cublas<at::BFloat16, float>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
TORCH_CHECK(false, "gemm input type at::BFloat16 and output type float is not supported for ROCm");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else {
|
|
||||||
bgemm_internal_cublas<at::BFloat16, float>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype>
|
|
||||||
inline void bgemm_tunable(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
|
||||||
tunable::GemmStridedBatchedParams<Dtype> params;
|
|
||||||
params.transa = transa;
|
params.transa = transa;
|
||||||
params.transb = transb;
|
params.transb = transb;
|
||||||
params.m = m;
|
params.m = m;
|
||||||
@ -847,19 +767,19 @@ inline void bgemm_tunable(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
|||||||
bool transb_ = ((transb != 'n') && (transb != 'N'));
|
bool transb_ = ((transb != 'n') && (transb != 'N'));
|
||||||
|
|
||||||
if (transa_ && transb_) {
|
if (transa_ && transb_) {
|
||||||
static tunable::GemmStridedBatchedTunableOp<Dtype, tunable::BlasOp::T, tunable::BlasOp::T> bgemm{};
|
static tunable::GemmStridedBatchedTunableOp<DType, tunable::BlasOp::T, tunable::BlasOp::T> bgemm{};
|
||||||
bgemm(¶ms);
|
bgemm(¶ms);
|
||||||
}
|
}
|
||||||
else if (transa_ && !transb_) {
|
else if (transa_ && !transb_) {
|
||||||
static tunable::GemmStridedBatchedTunableOp<Dtype, tunable::BlasOp::T, tunable::BlasOp::N> bgemm{};
|
static tunable::GemmStridedBatchedTunableOp<DType, tunable::BlasOp::T, tunable::BlasOp::N> bgemm{};
|
||||||
bgemm(¶ms);
|
bgemm(¶ms);
|
||||||
}
|
}
|
||||||
else if (!transa_ && transb_) {
|
else if (!transa_ && transb_) {
|
||||||
static tunable::GemmStridedBatchedTunableOp<Dtype, tunable::BlasOp::N, tunable::BlasOp::T> bgemm{};
|
static tunable::GemmStridedBatchedTunableOp<DType, tunable::BlasOp::N, tunable::BlasOp::T> bgemm{};
|
||||||
bgemm(¶ms);
|
bgemm(¶ms);
|
||||||
}
|
}
|
||||||
else if (!transa_ && !transb_) {
|
else if (!transa_ && !transb_) {
|
||||||
static tunable::GemmStridedBatchedTunableOp<Dtype, tunable::BlasOp::N, tunable::BlasOp::N> bgemm{};
|
static tunable::GemmStridedBatchedTunableOp<DType, tunable::BlasOp::N, tunable::BlasOp::N> bgemm{};
|
||||||
bgemm(¶ms);
|
bgemm(¶ms);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -933,35 +853,9 @@ void bgemm<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <typename Dtype>
|
||||||
void bgemm<at::Half, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)) {
|
inline void gemm_internal_cublas(CUDABLAS_GEMM_ARGTYPES(Dtype)) {
|
||||||
#ifdef USE_ROCM
|
static_assert(false && sizeof(Dtype), "at::cuda::blas::gemm_internal_cublas: not implemented");
|
||||||
TORCH_CHECK(false, "bgemm input type at::Half and output type float is not supported for ROCm");
|
|
||||||
#endif
|
|
||||||
// TODO: Support tuning for Half inputs and FP32 output
|
|
||||||
bgemm_internal<at::Half, float>(CUDABLAS_BGEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void bgemm<at::BFloat16, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)) {
|
|
||||||
#ifdef USE_ROCM
|
|
||||||
TORCH_CHECK(false, "bgemm input type at::BFloat16 and output type float is not supported for ROCm");
|
|
||||||
#else
|
|
||||||
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
|
|
||||||
|
|
||||||
if (prop->major < 8)
|
|
||||||
TORCH_CHECK(false, "bgemm input type at::BFloat16 and output type float is only supported for CUDA devices with compute capability 8.0 or higher");
|
|
||||||
#endif
|
|
||||||
// TODO: Support tuning for BFloat16 inputs and FP32 output
|
|
||||||
bgemm_internal<at::BFloat16, float>(CUDABLAS_BGEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype>
|
|
||||||
inline void gemm_internal_cublas(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
|
||||||
TORCH_CHECK(false, "at::cuda::blas::gemm: not implemented for input type ", typeid(Dtype).name(), " and output type ", typeid(C_Dtype).name());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -1020,8 +914,8 @@ void gemm_internal_cublas<c10::complex<float>>(CUDABLAS_GEMM_ARGTYPES(c10::compl
|
|||||||
reinterpret_cast<cuComplex*>(c), ldc));
|
reinterpret_cast<cuComplex*>(c), ldc));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename C_Dtype>
|
template <>
|
||||||
inline void gemm_internal_cublas_half_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, C_Dtype)) {
|
void gemm_internal_cublas<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half)) {
|
||||||
// See Note [Writing Nondeterministic Operations]
|
// See Note [Writing Nondeterministic Operations]
|
||||||
globalContext().alertCuBLASConfigNotDeterministic();
|
globalContext().alertCuBLASConfigNotDeterministic();
|
||||||
cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
|
cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
|
||||||
@ -1100,7 +994,7 @@ inline void gemm_internal_cublas_half_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(
|
|||||||
ldb,
|
ldb,
|
||||||
beta_ptr,
|
beta_ptr,
|
||||||
c,
|
c,
|
||||||
std::is_same_v<C_Dtype, float> ? CUDA_R_32F : CUDA_R_16F,
|
CUDA_R_16F,
|
||||||
ldc,
|
ldc,
|
||||||
compute_type,
|
compute_type,
|
||||||
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
||||||
@ -1122,14 +1016,14 @@ inline void gemm_internal_cublas_half_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(
|
|||||||
ldb,
|
ldb,
|
||||||
&fbeta,
|
&fbeta,
|
||||||
c,
|
c,
|
||||||
std::is_same_v<C_Dtype, float> ? CUDA_R_32F : CUDA_R_16F,
|
CUDA_R_16F,
|
||||||
ldc));
|
ldc));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename C_Dtype>
|
template <>
|
||||||
inline void gemm_internal_cublas_bfloat16_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, C_Dtype)) {
|
void gemm_internal_cublas<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
|
||||||
globalContext().alertCuBLASConfigNotDeterministic();
|
globalContext().alertCuBLASConfigNotDeterministic();
|
||||||
cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
|
cublasHandle_t handle = at::cuda::getCurrentCUDABlasHandle();
|
||||||
cublasOperation_t opa = _cublasOpFromChar(transa);
|
cublasOperation_t opa = _cublasOpFromChar(transa);
|
||||||
@ -1166,35 +1060,15 @@ inline void gemm_internal_cublas_bfloat16_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DT
|
|||||||
ldb,
|
ldb,
|
||||||
&fbeta,
|
&fbeta,
|
||||||
c,
|
c,
|
||||||
std::is_same_v<C_Dtype, float> ? CUDA_R_32F : CUDA_R_16BF,
|
CUDA_R_16BF,
|
||||||
ldc,
|
ldc,
|
||||||
compute_type,
|
compute_type,
|
||||||
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
||||||
TORCH_CUDABLAS_CHECK(cublasSetMathMode(handle, CUBLAS_DEFAULT_MATH));
|
TORCH_CUDABLAS_CHECK(cublasSetMathMode(handle, CUBLAS_DEFAULT_MATH));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <typename Dtype>
|
||||||
void gemm_internal_cublas<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half)) {
|
inline void gemm_internal_cublaslt(CUDABLAS_GEMM_ARGTYPES(Dtype)) {
|
||||||
gemm_internal_cublas_half_helper<at::Half>(CUDABLAS_GEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void gemm_internal_cublas<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)) {
|
|
||||||
gemm_internal_cublas_half_helper<float>(CUDABLAS_GEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void gemm_internal_cublas<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
|
|
||||||
gemm_internal_cublas_bfloat16_helper<at::BFloat16>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void gemm_internal_cublas<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)) {
|
|
||||||
gemm_internal_cublas_bfloat16_helper<float>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype>
|
|
||||||
inline void gemm_internal_cublaslt(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
|
||||||
// forward to bgemm implementation but set strides and batches to 0
|
// forward to bgemm implementation but set strides and batches to 0
|
||||||
if (!bgemm_internal_cublaslt(transa, transb, m, n, k, alpha, a, lda, 0, b, ldb, 0, beta, c, ldc, 0, 0)) {
|
if (!bgemm_internal_cublaslt(transa, transb, m, n, k, alpha, a, lda, 0, b, ldb, 0, beta, c, ldc, 0, 0)) {
|
||||||
gemm_internal_cublas(CUDABLAS_GEMM_ARGS(Dtype));
|
gemm_internal_cublas(CUDABLAS_GEMM_ARGS(Dtype));
|
||||||
@ -1306,45 +1180,8 @@ void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16))
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template <typename DType>
|
||||||
void gemm_internal<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float))
|
inline void gemm_tunable(CUDABLAS_GEMM_ARGTYPES(DType)) {
|
||||||
{
|
|
||||||
if (at::globalContext().allowFP16AccumulationCuBLAS()) {
|
|
||||||
// Do not allow fp16 reductions with fp32 output
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported with allowFP16AccumulationCuBLAS");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
gemm_internal_cublaslt<at::Half, float>(CUDABLAS_GEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else {
|
|
||||||
gemm_internal_cublas<at::Half, float>(CUDABLAS_GEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
void gemm_internal<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float))
|
|
||||||
{
|
|
||||||
if (at::globalContext().blasPreferredBackend() == BlasBackend::Cublaslt) {
|
|
||||||
gemm_internal_cublaslt<at::BFloat16, float>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
#if defined(USE_ROCM) && !defined(_MSC_VER)
|
|
||||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else {
|
|
||||||
gemm_internal_cublas<at::BFloat16, float>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename DType, typename C_Dtype>
|
|
||||||
inline void gemm_tunable(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(DType, C_Dtype)) {
|
|
||||||
tunable::GemmParams<DType> params;
|
tunable::GemmParams<DType> params;
|
||||||
params.transa = transa;
|
params.transa = transa;
|
||||||
params.transb = transb;
|
params.transb = transb;
|
||||||
@ -1450,32 +1287,8 @@ void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
|
||||||
void gemm<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)) {
|
|
||||||
#ifdef USE_ROCM
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
|
|
||||||
#endif
|
|
||||||
// TODO: Support Tuning for fp16-fp32 gemm
|
|
||||||
gemm_internal<at::Half, float>(CUDABLAS_GEMM_ARGS(at::Half));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
template <typename Dtype>
|
||||||
template <>
|
|
||||||
void gemm<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)) {
|
|
||||||
#ifdef USE_ROCM
|
|
||||||
TORCH_CHECK(false, "gemm input type at::BFloat16 and output type float is not supported for ROCm");
|
|
||||||
#else
|
|
||||||
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
|
|
||||||
|
|
||||||
if (prop->major < 8)
|
|
||||||
TORCH_CHECK(false, "gemm input type at::BFloat16 and output type float is only supported for CUDA devices with compute capability 8.0 or higher");
|
|
||||||
#endif
|
|
||||||
// TODO: Support Tuning for bf16-fp32 gemm
|
|
||||||
gemm_internal<at::BFloat16, float>(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype>
|
|
||||||
bool gemm_and_bias(
|
bool gemm_and_bias(
|
||||||
bool transpose_mat1,
|
bool transpose_mat1,
|
||||||
bool transpose_mat2,
|
bool transpose_mat2,
|
||||||
@ -1488,27 +1301,13 @@ bool gemm_and_bias(
|
|||||||
const Dtype* mat2_ptr,
|
const Dtype* mat2_ptr,
|
||||||
int64_t mat2_ld,
|
int64_t mat2_ld,
|
||||||
const Dtype* bias,
|
const Dtype* bias,
|
||||||
C_Dtype* result_ptr,
|
Dtype* result_ptr,
|
||||||
int64_t result_ld,
|
int64_t result_ld,
|
||||||
GEMMAndBiasActivationEpilogue activation) {
|
GEMMAndBiasActivationEpilogue activation) {
|
||||||
|
|
||||||
if (std::is_same_v<C_Dtype, float> && std::is_same_v<Dtype, at::BFloat16>) {
|
|
||||||
#ifdef USE_ROCM
|
|
||||||
TORCH_CHECK(false, "gemm input type at::BFloat16 and output type float is not supported for ROCm");
|
|
||||||
#endif
|
|
||||||
} else if (std::is_same_v<C_Dtype, float> && std::is_same_v<Dtype, at::Half>) {
|
|
||||||
#ifdef USE_ROCM
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported for ROCm");
|
|
||||||
#endif
|
|
||||||
if (at::globalContext().allowFP16AccumulationCuBLAS())
|
|
||||||
TORCH_CHECK(false, "gemm input type at::Half and output type float is not supported with allowFP16AccumulationCuBLAS");
|
|
||||||
}
|
|
||||||
|
|
||||||
using opmath_t = at::opmath_type<Dtype>;
|
using opmath_t = at::opmath_type<Dtype>;
|
||||||
opmath_t beta_val = 0; // bias is added in epilogue
|
opmath_t beta_val = 0; // bias is added in epilogue
|
||||||
|
|
||||||
cudaDataType_t abType = CUDA_R_32F;
|
cudaDataType_t abcType = CUDA_R_32F;
|
||||||
cudaDataType_t cType = CUDA_R_32F;
|
|
||||||
cublasComputeType_t computeType = CUBLAS_COMPUTE_32F;
|
cublasComputeType_t computeType = CUBLAS_COMPUTE_32F;
|
||||||
cudaDataType_t scaleType = CUDA_R_32F;
|
cudaDataType_t scaleType = CUDA_R_32F;
|
||||||
void * alpha_ptr = &alpha_val;
|
void * alpha_ptr = &alpha_val;
|
||||||
@ -1518,14 +1317,14 @@ bool gemm_and_bias(
|
|||||||
at::Half hbeta_val;
|
at::Half hbeta_val;
|
||||||
#endif
|
#endif
|
||||||
if constexpr (std::is_same_v<Dtype, double>) {
|
if constexpr (std::is_same_v<Dtype, double>) {
|
||||||
abType = CUDA_R_64F;
|
abcType = CUDA_R_64F;
|
||||||
cType = CUDA_R_64F;
|
|
||||||
computeType = CUBLAS_COMPUTE_64F;
|
computeType = CUBLAS_COMPUTE_64F;
|
||||||
scaleType = CUDA_R_64F;
|
scaleType = CUDA_R_64F;
|
||||||
} else if constexpr (std::is_same_v<Dtype, float>) {
|
} else if constexpr (std::is_same_v<Dtype, float>) {
|
||||||
if (at::globalContext().allowTF32CuBLAS()) {
|
if (at::globalContext().allowTF32CuBLAS()) {
|
||||||
computeType = CUBLAS_COMPUTE_32F_FAST_TF32;
|
computeType = CUBLAS_COMPUTE_32F_FAST_TF32;
|
||||||
}
|
}
|
||||||
|
abcType = CUDA_R_32F;
|
||||||
} else if constexpr (std::is_same_v<Dtype, at::Half>) {
|
} else if constexpr (std::is_same_v<Dtype, at::Half>) {
|
||||||
#ifndef USE_ROCM
|
#ifndef USE_ROCM
|
||||||
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
|
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
|
||||||
@ -1538,11 +1337,9 @@ bool gemm_and_bias(
|
|||||||
beta_ptr = &hbeta_val;
|
beta_ptr = &hbeta_val;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
abType = CUDA_R_16F;
|
abcType = CUDA_R_16F;
|
||||||
cType = (std::is_same_v<C_Dtype, float>) ? CUDA_R_32F : CUDA_R_16F;
|
|
||||||
} else if constexpr (std::is_same_v<Dtype, at::BFloat16>) {
|
} else if constexpr (std::is_same_v<Dtype, at::BFloat16>) {
|
||||||
abType = CUDA_R_16BF;
|
abcType = CUDA_R_16BF;
|
||||||
cType = (std::is_same_v<C_Dtype, float>) ? CUDA_R_32F : CUDA_R_16BF;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CuBlasLtMatmulDescriptor computeDesc(computeType, scaleType);
|
CuBlasLtMatmulDescriptor computeDesc(computeType, scaleType);
|
||||||
@ -1572,9 +1369,9 @@ bool gemm_and_bias(
|
|||||||
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_BIAS_POINTER, bias);
|
computeDesc.setAttribute(CUBLASLT_MATMUL_DESC_BIAS_POINTER, bias);
|
||||||
}
|
}
|
||||||
|
|
||||||
CuBlasLtMatrixLayout Adesc(abType, m, k, mat1_ld, transpose_mat1);
|
CuBlasLtMatrixLayout Adesc(abcType, m, k, mat1_ld, transpose_mat1);
|
||||||
CuBlasLtMatrixLayout Bdesc(abType, k, n, mat2_ld, transpose_mat2);
|
CuBlasLtMatrixLayout Bdesc(abcType, k, n, mat2_ld, transpose_mat2);
|
||||||
CuBlasLtMatrixLayout Cdesc(cType, m, n, result_ld);
|
CuBlasLtMatrixLayout Cdesc(abcType, m, n, result_ld);
|
||||||
|
|
||||||
CuBlasLtMatmulPreference preference;
|
CuBlasLtMatmulPreference preference;
|
||||||
// See https://github.com/pytorch/pytorch/issues/73328 for reasoning behind
|
// See https://github.com/pytorch/pytorch/issues/73328 for reasoning behind
|
||||||
@ -1652,10 +1449,8 @@ bool gemm_and_bias(
|
|||||||
mat2_ld,
|
mat2_ld,
|
||||||
" result_ld ",
|
" result_ld ",
|
||||||
result_ld,
|
result_ld,
|
||||||
" abType ",
|
" abcType ",
|
||||||
abType,
|
abcType,
|
||||||
" cType ",
|
|
||||||
cType,
|
|
||||||
" computeType ",
|
" computeType ",
|
||||||
computeType,
|
computeType,
|
||||||
" scaleType ",
|
" scaleType ",
|
||||||
@ -1714,22 +1509,6 @@ template bool gemm_and_bias(
|
|||||||
int64_t result_ld,
|
int64_t result_ld,
|
||||||
GEMMAndBiasActivationEpilogue activation);
|
GEMMAndBiasActivationEpilogue activation);
|
||||||
|
|
||||||
template bool gemm_and_bias(
|
|
||||||
bool transpose_mat1,
|
|
||||||
bool transpose_mat2,
|
|
||||||
int64_t m,
|
|
||||||
int64_t n,
|
|
||||||
int64_t k,
|
|
||||||
at::opmath_type<at::Half> alpha_val,
|
|
||||||
const at::Half* mat1_ptr,
|
|
||||||
int64_t mat1_ld,
|
|
||||||
const at::Half* mat2_ptr,
|
|
||||||
int64_t mat2_ld,
|
|
||||||
const at::Half* bias,
|
|
||||||
float* result_ptr,
|
|
||||||
int64_t result_ld,
|
|
||||||
GEMMAndBiasActivationEpilogue activation);
|
|
||||||
|
|
||||||
template bool gemm_and_bias(
|
template bool gemm_and_bias(
|
||||||
bool transpose_mat1,
|
bool transpose_mat1,
|
||||||
bool transpose_mat2,
|
bool transpose_mat2,
|
||||||
@ -1746,22 +1525,6 @@ template bool gemm_and_bias(
|
|||||||
int64_t result_ld,
|
int64_t result_ld,
|
||||||
GEMMAndBiasActivationEpilogue activation);
|
GEMMAndBiasActivationEpilogue activation);
|
||||||
|
|
||||||
template bool gemm_and_bias(
|
|
||||||
bool transpose_mat1,
|
|
||||||
bool transpose_mat2,
|
|
||||||
int64_t m,
|
|
||||||
int64_t n,
|
|
||||||
int64_t k,
|
|
||||||
at::opmath_type<at::BFloat16> alpha_val,
|
|
||||||
const at::BFloat16* mat1_ptr,
|
|
||||||
int64_t mat1_ld,
|
|
||||||
const at::BFloat16* mat2_ptr,
|
|
||||||
int64_t mat2_ld,
|
|
||||||
const at::BFloat16* bias,
|
|
||||||
float* result_ptr,
|
|
||||||
int64_t result_ld,
|
|
||||||
GEMMAndBiasActivationEpilogue activation);
|
|
||||||
|
|
||||||
void scaled_gemm(
|
void scaled_gemm(
|
||||||
char transa,
|
char transa,
|
||||||
char transb,
|
char transb,
|
||||||
|
|||||||
@ -39,26 +39,18 @@ private:
|
|||||||
|
|
||||||
/* LEVEL 3 BLAS FUNCTIONS */
|
/* LEVEL 3 BLAS FUNCTIONS */
|
||||||
|
|
||||||
#define CUDABLAS_GEMM_ARGTYPES(Dtype) CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, Dtype)
|
#define CUDABLAS_GEMM_ARGTYPES(Dtype) \
|
||||||
|
|
||||||
#define CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype) \
|
|
||||||
char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type<Dtype> alpha, \
|
char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type<Dtype> alpha, \
|
||||||
const Dtype *a, int64_t lda, const Dtype *b, int64_t ldb, at::opmath_type<Dtype> beta,\
|
const Dtype *a, int64_t lda, const Dtype *b, int64_t ldb, at::opmath_type<Dtype> beta,\
|
||||||
C_Dtype *c, int64_t ldc
|
Dtype *c, int64_t ldc
|
||||||
|
|
||||||
#define CUDABLAS_GEMM_ARGS(Dtype) transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc
|
#define CUDABLAS_GEMM_ARGS(Dtype) transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc
|
||||||
|
|
||||||
#define CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT \
|
template <typename Dtype>
|
||||||
((std::is_same<Dtype, at::Half>::value || std::is_same<Dtype, at::BFloat16>::value) && std::is_same<C_Dtype, float>::value)
|
inline void gemm(CUDABLAS_GEMM_ARGTYPES(Dtype)) {
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype, typename std::enable_if<!CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT, Dtype>::type* = nullptr>
|
|
||||||
inline void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
|
||||||
static_assert(false&&sizeof(Dtype),"at::cuda::blas::gemm: not implemented");
|
static_assert(false&&sizeof(Dtype),"at::cuda::blas::gemm: not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype, typename std::enable_if<CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT, Dtype>::type* = nullptr>
|
|
||||||
void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype));
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void gemm<double>(CUDABLAS_GEMM_ARGTYPES(double));
|
void gemm<double>(CUDABLAS_GEMM_ARGTYPES(double));
|
||||||
template <>
|
template <>
|
||||||
@ -71,13 +63,9 @@ template <>
|
|||||||
void gemm<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
|
void gemm<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
|
||||||
template <>
|
template <>
|
||||||
void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
|
void gemm<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
|
||||||
template<>
|
|
||||||
void gemm<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float));
|
|
||||||
template<>
|
|
||||||
void gemm<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float));
|
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype>
|
template <typename Dtype>
|
||||||
inline void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
inline void gemm_internal(CUDABLAS_GEMM_ARGTYPES(Dtype)) {
|
||||||
static_assert(false&&sizeof(Dtype),"at::cuda::blas::gemm_internal: not implemented");
|
static_assert(false&&sizeof(Dtype),"at::cuda::blas::gemm_internal: not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,10 +81,6 @@ template <>
|
|||||||
void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
|
void gemm_internal<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half));
|
||||||
template <>
|
template <>
|
||||||
void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
|
void gemm_internal<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16));
|
||||||
template<>
|
|
||||||
void gemm_internal<at::Half, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float));
|
|
||||||
template<>
|
|
||||||
void gemm_internal<at::BFloat16, float>(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float));
|
|
||||||
|
|
||||||
enum GEMMAndBiasActivationEpilogue {
|
enum GEMMAndBiasActivationEpilogue {
|
||||||
None,
|
None,
|
||||||
@ -106,7 +90,7 @@ enum GEMMAndBiasActivationEpilogue {
|
|||||||
|
|
||||||
// NOTE: GELU activation is not supported prior to CUDA 11.4 and will
|
// NOTE: GELU activation is not supported prior to CUDA 11.4 and will
|
||||||
// do nothing if passed in that case.
|
// do nothing if passed in that case.
|
||||||
template <typename Dtype, typename C_Dtype = Dtype>
|
template <typename Dtype>
|
||||||
bool gemm_and_bias(
|
bool gemm_and_bias(
|
||||||
bool transpose_mat1,
|
bool transpose_mat1,
|
||||||
bool transpose_mat2,
|
bool transpose_mat2,
|
||||||
@ -119,7 +103,7 @@ bool gemm_and_bias(
|
|||||||
const Dtype* mat2_ptr,
|
const Dtype* mat2_ptr,
|
||||||
int64_t mat2_ld,
|
int64_t mat2_ld,
|
||||||
const Dtype* bias,
|
const Dtype* bias,
|
||||||
C_Dtype* result_ptr,
|
Dtype* result_ptr,
|
||||||
int64_t result_ld,
|
int64_t result_ld,
|
||||||
GEMMAndBiasActivationEpilogue activation = GEMMAndBiasActivationEpilogue::None);
|
GEMMAndBiasActivationEpilogue activation = GEMMAndBiasActivationEpilogue::None);
|
||||||
|
|
||||||
@ -161,25 +145,20 @@ void scaled_gemm(
|
|||||||
bool use_fast_accum,
|
bool use_fast_accum,
|
||||||
bool use_rowwise);
|
bool use_rowwise);
|
||||||
|
|
||||||
#define CUDABLAS_BGEMM_ARGTYPES(Dtype) CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, Dtype)
|
#define CUDABLAS_BGEMM_ARGTYPES(Dtype) \
|
||||||
|
|
||||||
#define CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype) \
|
|
||||||
char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type<Dtype> alpha, \
|
char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type<Dtype> alpha, \
|
||||||
const Dtype *a, int64_t lda, int64_t stridea, \
|
const Dtype *a, int64_t lda, int64_t stridea, \
|
||||||
const Dtype *b, int64_t ldb, int64_t strideb, \
|
const Dtype *b, int64_t ldb, int64_t strideb, \
|
||||||
at::opmath_type<Dtype> beta, C_Dtype *c, int64_t ldc, int64_t stridec, int64_t num_batches
|
at::opmath_type<Dtype> beta, Dtype *c, int64_t ldc, int64_t stridec, int64_t num_batches
|
||||||
|
|
||||||
#define CUDABLAS_BGEMM_ARGS(Dtype) \
|
#define CUDABLAS_BGEMM_ARGS(Dtype) \
|
||||||
transa, transb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, beta, c, ldc, stridec, num_batches
|
transa, transb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, beta, c, ldc, stridec, num_batches
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype, typename std::enable_if<!CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT, Dtype>::type* = nullptr>
|
template <typename Dtype>
|
||||||
inline void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
inline void bgemm(CUDABLAS_BGEMM_ARGTYPES(Dtype)) {
|
||||||
static_assert(false&&sizeof(Dtype),"at::cuda::blas::bgemm: not implemented");
|
static_assert(false&&sizeof(Dtype),"at::cuda::blas::bgemm: not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype, typename std::enable_if<CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT, Dtype>::type* = nullptr>
|
|
||||||
void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype));
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void bgemm<double>(CUDABLAS_BGEMM_ARGTYPES(double));
|
void bgemm<double>(CUDABLAS_BGEMM_ARGTYPES(double));
|
||||||
template <>
|
template <>
|
||||||
@ -192,13 +171,9 @@ template <>
|
|||||||
void bgemm<at::Half>(CUDABLAS_BGEMM_ARGTYPES(at::Half));
|
void bgemm<at::Half>(CUDABLAS_BGEMM_ARGTYPES(at::Half));
|
||||||
template <>
|
template <>
|
||||||
void bgemm<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16));
|
void bgemm<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16));
|
||||||
template<>
|
|
||||||
void bgemm<at::Half, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float));
|
|
||||||
template<>
|
|
||||||
void bgemm<at::BFloat16, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float));
|
|
||||||
|
|
||||||
template <typename Dtype, typename C_Dtype = Dtype>
|
template <typename Dtype>
|
||||||
inline void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) {
|
inline void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(Dtype)) {
|
||||||
static_assert(false&&sizeof(Dtype),"at::cuda::blas::bgemm_internal: not implemented");
|
static_assert(false&&sizeof(Dtype),"at::cuda::blas::bgemm_internal: not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -214,10 +189,6 @@ template <>
|
|||||||
void bgemm_internal<at::Half>(CUDABLAS_BGEMM_ARGTYPES(at::Half));
|
void bgemm_internal<at::Half>(CUDABLAS_BGEMM_ARGTYPES(at::Half));
|
||||||
template <>
|
template <>
|
||||||
void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16));
|
void bgemm_internal<at::BFloat16>(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16));
|
||||||
template<>
|
|
||||||
void bgemm_internal<at::Half, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float));
|
|
||||||
template<>
|
|
||||||
void bgemm_internal<at::BFloat16, float>(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float));
|
|
||||||
|
|
||||||
#define CUDABLAS_TRSM_ARGTYPES(Dtype) \
|
#define CUDABLAS_TRSM_ARGTYPES(Dtype) \
|
||||||
cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, \
|
cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, \
|
||||||
|
|||||||
@ -249,13 +249,58 @@ struct CUDACachingHostAllocatorImpl
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
DECLARE_HOST_ALLOCATOR(
|
void raw_local_deleter(void* ptr);
|
||||||
CUDACachingHostAllocator,
|
|
||||||
CUDACachingHostAllocatorImpl,
|
|
||||||
raw_local_deleter,
|
|
||||||
caching_host_allocator);
|
|
||||||
|
|
||||||
REGISTER_HOST_ALLOCATOR(at::kCUDA, &caching_host_allocator)
|
struct CUDACachingHostAllocator final
|
||||||
|
: public CachingHostAllocatorInterface<CUDACachingHostAllocatorImpl> {
|
||||||
|
at::DataPtr allocate(size_t size) override {
|
||||||
|
auto ptr_and_ctx = impl_->allocate(size);
|
||||||
|
return {
|
||||||
|
ptr_and_ctx.first,
|
||||||
|
ptr_and_ctx.second,
|
||||||
|
&raw_local_deleter,
|
||||||
|
at::DeviceType::CPU};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
CUDACachingHostAllocator caching_host_allocator;
|
||||||
|
|
||||||
|
static inline CUDACachingHostAllocator& getCUDACachingHostAllocator() {
|
||||||
|
return caching_host_allocator;
|
||||||
|
}
|
||||||
|
|
||||||
|
void raw_local_deleter(void* ptr) {
|
||||||
|
getCUDACachingHostAllocator().free(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
|
bool CachingHostAllocator_recordEvent(
|
||||||
|
void* ptr,
|
||||||
|
void* ctx,
|
||||||
|
at::cuda::CUDAStream stream) {
|
||||||
|
return getCUDACachingHostAllocator().record_event(ptr, ctx, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Releases cached pinned memory allocations via cudaHostFree
|
||||||
|
void CachingHostAllocator_emptyCache() {
|
||||||
|
getCUDACachingHostAllocator().empty_cache();
|
||||||
|
}
|
||||||
|
|
||||||
|
at::Allocator* getCachingHostAllocator() {
|
||||||
|
return &getCUDACachingHostAllocator();
|
||||||
|
}
|
||||||
|
|
||||||
|
at::HostStats CachingHostAllocator_getStats() {
|
||||||
|
return getCUDACachingHostAllocator().getStats();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CachingHostAllocator_resetAccumulatedStats() {
|
||||||
|
return getCUDACachingHostAllocator().resetAccumulatedStats();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CachingHostAllocator_resetPeakStats() {
|
||||||
|
return getCUDACachingHostAllocator().resetPeakStats();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace at::cuda
|
} // namespace at::cuda
|
||||||
|
|||||||
@ -18,52 +18,25 @@ namespace at::cuda {
|
|||||||
// call between host and device, and passed the corresponding context from the
|
// call between host and device, and passed the corresponding context from the
|
||||||
// allocation. This is currently invoked by at::native::copy_kernel_cuda.
|
// allocation. This is currently invoked by at::native::copy_kernel_cuda.
|
||||||
//
|
//
|
||||||
C10_DEPRECATED_MESSAGE(
|
TORCH_CUDA_CPP_API c10::Allocator* getCachingHostAllocator();
|
||||||
"at::cuda::getCachingHostAllocator() is deprecated. Please use at::getHostAllocator(at::kCUDA) instead.")
|
|
||||||
inline TORCH_CUDA_CPP_API at::HostAllocator* getCachingHostAllocator() {
|
|
||||||
return at::getHostAllocator(at::kCUDA);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Records an event in the specified stream. The allocation corresponding to the
|
// Records an event in the specified stream. The allocation corresponding to the
|
||||||
// input `ptr`/`ctx` will not be re-used until the event has occurred.
|
// input `ptr`/`ctx` will not be re-used until the event has occurred.
|
||||||
C10_DEPRECATED_MESSAGE(
|
TORCH_CUDA_CPP_API bool CachingHostAllocator_recordEvent(
|
||||||
"at::cuda::CachingHostAllocator_recordEvent(...) is deprecated. Please use at::getHostAllocator(at::kCUDA)->record_event(...) instead.")
|
|
||||||
inline TORCH_CUDA_CPP_API bool CachingHostAllocator_recordEvent(
|
|
||||||
void* ptr,
|
void* ptr,
|
||||||
void* ctx,
|
void* ctx,
|
||||||
c10::cuda::CUDAStream stream) {
|
c10::cuda::CUDAStream stream);
|
||||||
return getHostAllocator(at::kCUDA)->record_event(ptr, ctx, stream.unwrap());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Releases cached pinned memory allocations via cudaHostFree
|
// Releases cached pinned memory allocations via cudaHostFree
|
||||||
C10_DEPRECATED_MESSAGE(
|
TORCH_CUDA_CPP_API void CachingHostAllocator_emptyCache();
|
||||||
"at::cuda::CachingHostAllocator_emptyCache() is deprecated. Please use at::getHostAllocator(at::kCUDA)->empty_cache() instead.")
|
|
||||||
inline TORCH_CUDA_CPP_API void CachingHostAllocator_emptyCache() {
|
|
||||||
getHostAllocator(at::kCUDA)->empty_cache();
|
|
||||||
}
|
|
||||||
|
|
||||||
C10_DEPRECATED_MESSAGE(
|
|
||||||
"at::cuda::HostAlloc(...) is deprecated. Please use at::getHostAllocator(at::kCUDA)->allocate(...) instead.")
|
|
||||||
inline TORCH_CUDA_CPP_API at::DataPtr HostAlloc(size_t size) {
|
inline TORCH_CUDA_CPP_API at::DataPtr HostAlloc(size_t size) {
|
||||||
return getHostAllocator(at::kCUDA)->allocate(size);
|
return getCachingHostAllocator()->allocate(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
C10_DEPRECATED_MESSAGE(
|
TORCH_CUDA_CPP_API at::HostStats CachingHostAllocator_getStats();
|
||||||
"at::cuda::CachingHostAllocator_getStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->get_stats() instead.")
|
|
||||||
inline TORCH_CUDA_CPP_API at::HostStats CachingHostAllocator_getStats() {
|
|
||||||
return getHostAllocator(at::kCUDA)->get_stats();
|
|
||||||
}
|
|
||||||
|
|
||||||
C10_DEPRECATED_MESSAGE(
|
TORCH_CUDA_CPP_API void CachingHostAllocator_resetAccumulatedStats();
|
||||||
"at::cuda::CachingHostAllocator_resetAccumulatedStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->reset_accumulated_stats() instead.")
|
TORCH_CUDA_CPP_API void CachingHostAllocator_resetPeakStats();
|
||||||
inline TORCH_CUDA_CPP_API void CachingHostAllocator_resetAccumulatedStats() {
|
|
||||||
getHostAllocator(at::kCUDA)->reset_accumulated_stats();
|
|
||||||
}
|
|
||||||
|
|
||||||
C10_DEPRECATED_MESSAGE(
|
|
||||||
"at::cuda::CachingHostAllocator_resetPeakStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->reset_peak_stats() instead.")
|
|
||||||
inline TORCH_CUDA_CPP_API void CachingHostAllocator_resetPeakStats() {
|
|
||||||
getHostAllocator(at::kCUDA)->reset_peak_stats();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace at::cuda
|
} // namespace at::cuda
|
||||||
|
|||||||
@ -1,10 +1,11 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <c10/core/Allocator.h>
|
||||||
#include <ATen/cuda/CachingHostAllocator.h>
|
#include <ATen/cuda/CachingHostAllocator.h>
|
||||||
|
|
||||||
namespace at::cuda {
|
namespace at::cuda {
|
||||||
|
|
||||||
inline TORCH_CUDA_CPP_API at::HostAllocator* getPinnedMemoryAllocator() {
|
inline TORCH_CUDA_CPP_API at::Allocator* getPinnedMemoryAllocator() {
|
||||||
return at::getHostAllocator(at::kCUDA);
|
return getCachingHostAllocator();
|
||||||
}
|
}
|
||||||
} // namespace at::cuda
|
} // namespace at::cuda
|
||||||
|
|||||||
@ -469,7 +469,7 @@ private:
|
|||||||
bool duplicate_inputs_{false};
|
bool duplicate_inputs_{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, typename C_Dtype = T>
|
template <typename T>
|
||||||
struct GemmStridedBatchedParams : OpParams {
|
struct GemmStridedBatchedParams : OpParams {
|
||||||
std::string BLASSignature() const override {
|
std::string BLASSignature() const override {
|
||||||
std::string alpha_str = to_string_opmath<T>(alpha);
|
std::string alpha_str = to_string_opmath<T>(alpha);
|
||||||
@ -477,7 +477,7 @@ struct GemmStridedBatchedParams : OpParams {
|
|||||||
return fmt::sprintf("- { function: matmul, M: %ld, N: %ld, K: %ld, lda: %ld, ldb: %ld, ldc: %ld, ldd: %ld, stride_a: %ld, stride_b: %ld, stride_c: %ld, stride_d: %ld, "
|
return fmt::sprintf("- { function: matmul, M: %ld, N: %ld, K: %ld, lda: %ld, ldb: %ld, ldc: %ld, ldd: %ld, stride_a: %ld, stride_b: %ld, stride_c: %ld, stride_d: %ld, "
|
||||||
"alpha: %s, beta: %s, transA: %c, transB: %c, batch_count: %ld, a_type: %s, b_type: %s, c_type: %s, d_type: %s, scale_type: %s, compute_type: %s }",
|
"alpha: %s, beta: %s, transA: %c, transB: %c, batch_count: %ld, a_type: %s, b_type: %s, c_type: %s, d_type: %s, scale_type: %s, compute_type: %s }",
|
||||||
m, n, k, lda, ldb, ldc, ldc, stride_a, stride_b, stride_c, stride_c, alpha_str, beta_str, transa, transb, batch,
|
m, n, k, lda, ldb, ldc, ldc, stride_a, stride_b, stride_c, stride_c, alpha_str, beta_str, transa, transb, batch,
|
||||||
BLASTypeName<T>(T{}), BLASTypeName<T>(T{}), BLASTypeName<C_Dtype>(C_Dtype{}), BLASTypeName<T>(T{}), ComputeTypeFor<T>(), ComputeTypeFor<T>());
|
BLASTypeName<T>(T{}), BLASTypeName<T>(T{}), BLASTypeName<T>(T{}), BLASTypeName<T>(T{}), ComputeTypeFor<T>(), ComputeTypeFor<T>());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Signature() const override {
|
std::string Signature() const override {
|
||||||
@ -517,7 +517,7 @@ struct GemmStridedBatchedParams : OpParams {
|
|||||||
c10::DeviceIndex device = 0;
|
c10::DeviceIndex device = 0;
|
||||||
AT_CUDA_CHECK(c10::cuda::GetDevice(&device));
|
AT_CUDA_CHECK(c10::cuda::GetDevice(&device));
|
||||||
size_t c_size = GetSizeC();
|
size_t c_size = GetSizeC();
|
||||||
copy->c = static_cast<C_Dtype*>(c10::cuda::CUDACachingAllocator::raw_alloc(c_size));
|
copy->c = static_cast<T*>(c10::cuda::CUDACachingAllocator::raw_alloc(c_size));
|
||||||
AT_CUDA_CHECK(c10::cuda::CUDACachingAllocator::memcpyAsync(
|
AT_CUDA_CHECK(c10::cuda::CUDACachingAllocator::memcpyAsync(
|
||||||
copy->c, device, c, device, c_size, getCurrentCUDAStream(device), true));
|
copy->c, device, c, device, c_size, getCurrentCUDAStream(device), true));
|
||||||
if (duplicate_inputs) {
|
if (duplicate_inputs) {
|
||||||
@ -544,7 +544,7 @@ struct GemmStridedBatchedParams : OpParams {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TuningStatus NumericalCheck(GemmStridedBatchedParams<T> *other) {
|
TuningStatus NumericalCheck(GemmStridedBatchedParams<T> *other) {
|
||||||
auto c_dtype = c10::CppTypeToScalarType<C_Dtype>::value;
|
auto c_dtype = c10::CppTypeToScalarType<T>::value;
|
||||||
return detail::NumericalCheck(c_dtype, c, other->c, GetSizeC()/sizeof(T)) ? OK : FAIL;
|
return detail::NumericalCheck(c_dtype, c, other->c, GetSizeC()/sizeof(T)) ? OK : FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -561,7 +561,7 @@ struct GemmStridedBatchedParams : OpParams {
|
|||||||
int64_t ldb{};
|
int64_t ldb{};
|
||||||
int64_t stride_b{};
|
int64_t stride_b{};
|
||||||
at::opmath_type<T> beta;
|
at::opmath_type<T> beta;
|
||||||
C_Dtype* c{};
|
T* c{};
|
||||||
int64_t ldc{};
|
int64_t ldc{};
|
||||||
int64_t stride_c{};
|
int64_t stride_c{};
|
||||||
int64_t batch{};
|
int64_t batch{};
|
||||||
|
|||||||
@ -849,7 +849,10 @@ namespace at::native {
|
|||||||
// linear algebra operations
|
// linear algebra operations
|
||||||
|
|
||||||
template<class scalar_t>
|
template<class scalar_t>
|
||||||
static void lapackCholeskySolve(char uplo, int n, int nrhs, scalar_t *a, int lda, scalar_t *b, int ldb, int *info);
|
void lapackCholeskySolve(char uplo, int n, int nrhs, scalar_t *a, int lda, scalar_t *b, int ldb, int *info);
|
||||||
|
|
||||||
|
template<class scalar_t, class value_t=scalar_t>
|
||||||
|
void lapackSymeig(char jobz, char uplo, int n, scalar_t *a, int lda, value_t *w, scalar_t *work, int lwork, value_t *rwork, int *info);
|
||||||
|
|
||||||
template<> void lapackLu<c10::complex<double>>(int m, int n, c10::complex<double> *a, int lda, int *ipiv, int *info) {
|
template<> void lapackLu<c10::complex<double>>(int m, int n, c10::complex<double> *a, int lda, int *ipiv, int *info) {
|
||||||
zgetrf_(&m, &n, reinterpret_cast<std::complex<double>*>(a), &lda, ipiv, info);
|
zgetrf_(&m, &n, reinterpret_cast<std::complex<double>*>(a), &lda, ipiv, info);
|
||||||
|
|||||||
@ -1383,35 +1383,35 @@ Tensor bitwise_right_shift(const Scalar& self, const Tensor& other) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Stub>
|
template <typename Stub>
|
||||||
static Tensor& comparison_op_out(Tensor& result, const Tensor& self, const Tensor& other, Stub& stub) {
|
Tensor& comparison_op_out(Tensor& result, const Tensor& self, const Tensor& other, Stub& stub) {
|
||||||
auto iter = TensorIterator::comparison_op(result, self, other);
|
auto iter = TensorIterator::comparison_op(result, self, other);
|
||||||
stub(iter.device_type(), iter);
|
stub(iter.device_type(), iter);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename OutImpl>
|
template <typename OutImpl>
|
||||||
static Tensor comparison_op(const Tensor& self, const Tensor& other, OutImpl& out_impl) {
|
Tensor comparison_op(const Tensor& self, const Tensor& other, OutImpl& out_impl) {
|
||||||
Tensor result = at::empty({0}, self.options().dtype(kBool));
|
Tensor result = at::empty({0}, self.options().dtype(kBool));
|
||||||
return out_impl(result, self, other);
|
return out_impl(result, self, other);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename OutImpl>
|
template <typename OutImpl>
|
||||||
static Tensor& comparison_op_(Tensor& self, const Tensor& other, OutImpl& out_impl) {
|
Tensor& comparison_op_(Tensor& self, const Tensor& other, OutImpl& out_impl) {
|
||||||
return out_impl(self, self, other);
|
return out_impl(self, self, other);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename OutImpl>
|
template <typename OutImpl>
|
||||||
static Tensor& comparison_op_out(Tensor& result, const Tensor& self, const Scalar& other, OutImpl& out_impl) {
|
Tensor& comparison_op_out(Tensor& result, const Tensor& self, const Scalar& other, OutImpl& out_impl) {
|
||||||
return out_impl(result, self, wrapped_scalar_tensor(other));
|
return out_impl(result, self, wrapped_scalar_tensor(other));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename OutImpl>
|
template <typename OutImpl>
|
||||||
static Tensor comparison_op(const Tensor& self, const Scalar& other, OutImpl& out_impl) {
|
Tensor comparison_op(const Tensor& self, const Scalar& other, OutImpl& out_impl) {
|
||||||
return comparison_op(self, wrapped_scalar_tensor(other), out_impl);
|
return comparison_op(self, wrapped_scalar_tensor(other), out_impl);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename OutImpl>
|
template <typename OutImpl>
|
||||||
static Tensor& comparison_op_(Tensor& self, const Scalar& other, OutImpl& out_impl) {
|
Tensor& comparison_op_(Tensor& self, const Scalar& other, OutImpl& out_impl) {
|
||||||
return out_impl(self, self, wrapped_scalar_tensor(other));
|
return out_impl(self, self, wrapped_scalar_tensor(other));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -7,11 +7,6 @@
|
|||||||
#include <ATen/Config.h>
|
#include <ATen/Config.h>
|
||||||
|
|
||||||
#include <ATen/native/mkldnn/Matmul.h>
|
#include <ATen/native/mkldnn/Matmul.h>
|
||||||
#include <ATen/native/mkldnn/Linear.h>
|
|
||||||
#include <ATen/native/Resize.h>
|
|
||||||
#if !defined(__s390x__) && !defined(__powerpc__)
|
|
||||||
#include <cpuinfo.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef AT_PER_OPERATOR_HEADERS
|
#ifndef AT_PER_OPERATOR_HEADERS
|
||||||
#include <ATen/CPUFunctions.h>
|
#include <ATen/CPUFunctions.h>
|
||||||
@ -29,9 +24,6 @@
|
|||||||
#include <ATen/ops/mv_native.h>
|
#include <ATen/ops/mv_native.h>
|
||||||
#include <ATen/ops/scalar_tensor_native.h>
|
#include <ATen/ops/scalar_tensor_native.h>
|
||||||
#include <ATen/ops/vdot_native.h>
|
#include <ATen/ops/vdot_native.h>
|
||||||
#include <ATen/ops/_scaled_mm_native.h>
|
|
||||||
#include <ATen/ops/mul.h>
|
|
||||||
#include <ATen/ops/matmul.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace at::meta {
|
namespace at::meta {
|
||||||
@ -230,92 +222,4 @@ Tensor vdot(const Tensor &self, const Tensor &other){
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static Tensor&
|
|
||||||
_scaled_mm_out_cpu_emulated(const Tensor& mat1, const Tensor& mat2,
|
|
||||||
const Tensor& scale_a,
|
|
||||||
const Tensor& scale_b,
|
|
||||||
const std::optional<at::Tensor>& bias,
|
|
||||||
const std::optional<at::Tensor>& scale_result,
|
|
||||||
std::optional<c10::ScalarType> out_dtype,
|
|
||||||
bool use_fast_accum,
|
|
||||||
Tensor& out) {
|
|
||||||
TORCH_CHECK(mat1.dim() == 2, "mat1 must be a matrix");
|
|
||||||
TORCH_CHECK(mat2.dim() == 2, "mat2 must be a matrix");
|
|
||||||
TORCH_CHECK(
|
|
||||||
mat1.sizes()[1] == mat2.sizes()[0], "mat1 and mat2 shapes cannot be multiplied (",
|
|
||||||
mat1.sizes()[0], "x", mat1.sizes()[1], " and ", mat2.sizes()[0], "x", mat2.sizes()[1], ")");
|
|
||||||
|
|
||||||
TORCH_INTERNAL_ASSERT((scale_a.numel() == 1 && scale_b.numel() == 1), "Now _scaled_mm only supports per-tensor scaling for CPU backend.");
|
|
||||||
TORCH_CHECK(!bias || bias->numel() == mat2.sizes()[1], "Bias must be size ", mat2.sizes()[1],
|
|
||||||
" but got ", bias->numel());
|
|
||||||
|
|
||||||
// Check types
|
|
||||||
TORCH_CHECK(!out_dtype || *out_dtype == out.scalar_type(), "out_dtype must match output matrix type");
|
|
||||||
TORCH_CHECK(isFloat8Type(mat1.scalar_type()), "Expected mat1 to be Float8 matrix got ", mat1.scalar_type());
|
|
||||||
TORCH_CHECK(isFloat8Type(mat2.scalar_type()), "Expected mat2 to be Float8 matrix got ", mat2.scalar_type());
|
|
||||||
|
|
||||||
auto mat1_c = mat1.contiguous();
|
|
||||||
auto mat2_c = mat2.contiguous();
|
|
||||||
IntArrayRef mat1_sizes = mat1_c.sizes();
|
|
||||||
IntArrayRef mat2_sizes = mat2_c.sizes();
|
|
||||||
at::native::resize_output(out, {mat1_sizes[0], mat2_sizes[1]});
|
|
||||||
|
|
||||||
float input_scale = scale_a.item<float>();
|
|
||||||
float weight_scale = scale_b.item<float>();
|
|
||||||
auto fp32_mat1 = at::mul(mat1.to(kFloat), input_scale);
|
|
||||||
auto fp32_mat2 = at::mul(mat2_c.to(kFloat), weight_scale);
|
|
||||||
auto out_tmp = at::matmul(fp32_mat1, fp32_mat2);
|
|
||||||
if (bias) {
|
|
||||||
out_tmp.add_(bias.value());
|
|
||||||
}
|
|
||||||
out_tmp = out_tmp.to(out.scalar_type());
|
|
||||||
out.copy_(out_tmp);
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor&
|
|
||||||
_scaled_mm_out_cpu(const Tensor& mat1, const Tensor& mat2,
|
|
||||||
const Tensor& scale_a,
|
|
||||||
const Tensor& scale_b,
|
|
||||||
const std::optional<at::Tensor>& bias,
|
|
||||||
const std::optional<at::Tensor>& scale_result,
|
|
||||||
std::optional<c10::ScalarType> out_dtype,
|
|
||||||
bool use_fast_accum,
|
|
||||||
Tensor& out) {
|
|
||||||
#if AT_MKLDNN_ENABLED()
|
|
||||||
if (at::globalContext().userEnabledMkldnn()) {
|
|
||||||
bool mixed_dtype = mat1.scalar_type() != mat2.scalar_type();
|
|
||||||
if ((!mixed_dtype && cpuinfo_has_x86_amx_int8()) ||
|
|
||||||
(mixed_dtype && cpuinfo_has_x86_amx_fp16())) {
|
|
||||||
return mkldnn_scaled_mm(
|
|
||||||
mat1,
|
|
||||||
mat2,
|
|
||||||
scale_a,
|
|
||||||
scale_b,
|
|
||||||
bias,
|
|
||||||
scale_result,
|
|
||||||
out_dtype,
|
|
||||||
use_fast_accum,
|
|
||||||
out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
return _scaled_mm_out_cpu_emulated(mat1, mat2, scale_a, scale_b, bias, scale_result, out_dtype, use_fast_accum, out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor
|
|
||||||
_scaled_mm_cpu(const Tensor& mat_a, const Tensor& mat_b,
|
|
||||||
const Tensor& scale_a,
|
|
||||||
const Tensor& scale_b,
|
|
||||||
const std::optional<at::Tensor>& bias,
|
|
||||||
const std::optional<at::Tensor>& scale_result,
|
|
||||||
std::optional<c10::ScalarType> out_dtype,
|
|
||||||
bool use_fast_accum) {
|
|
||||||
const auto out_dtype_ = out_dtype.value_or(mat_a.scalar_type());
|
|
||||||
Tensor out = at::empty({0}, mat_a.options().dtype(out_dtype_));
|
|
||||||
return _scaled_mm_out_cpu(mat_a, mat_b, scale_a, scale_b, bias, scale_result, out_dtype, use_fast_accum, out);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace at::native
|
} // namespace at::native
|
||||||
|
|||||||
@ -116,44 +116,21 @@ void fp16_gemv_trans(
|
|||||||
fp16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy);
|
fp16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bf16_gemv_trans(
|
||||||
|
const int m,
|
||||||
|
const int n,
|
||||||
|
const at::BFloat16 alpha,
|
||||||
|
const at::BFloat16* a,
|
||||||
|
const int lda,
|
||||||
|
const at::BFloat16* x,
|
||||||
|
const int incx,
|
||||||
|
const at::BFloat16 beta,
|
||||||
|
at::BFloat16* y,
|
||||||
|
const int incy);
|
||||||
|
|
||||||
#endif // !defined(C10_MOBILE)
|
#endif // !defined(C10_MOBILE)
|
||||||
|
|
||||||
#if defined(__aarch64__) && !defined(C10_MOBILE)
|
#if defined(__aarch64__) && !defined(C10_MOBILE)
|
||||||
#ifdef __ARM_FEATURE_FP16_SCALAR_ARITHMETIC
|
|
||||||
static void fp16_gemv_notrans_fp16_arith(int m, int n, const float16_t* a, const int lda, const float16_t *x, float16_t *y) {
|
|
||||||
for (auto j = 0; j < n; j++) {
|
|
||||||
auto vecCol = vdup_n_f16(x[j]);
|
|
||||||
const auto* column = a + lda * j;
|
|
||||||
for (auto i = 0; i < m; i += 4) {
|
|
||||||
auto yf16 = y + i;
|
|
||||||
auto matRow = vld1_f16(column + i);
|
|
||||||
auto resVec = j != 0 ? vld1_f16(yf16) : vdup_n_f16(0);
|
|
||||||
resVec = vfma_lane_f16(resVec, matRow, vecCol, 0);
|
|
||||||
vst1_f16(yf16, resVec);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void fp16_gemv_notrans_fp32_arith(int m, int n, const float16_t* a, const int lda, const float16_t *x, float16_t *y) {
|
|
||||||
std::vector<float> sum(m);
|
|
||||||
for (auto j = 0; j < n; j++) {
|
|
||||||
auto vecCol = vdup_n_f32(x[j]);
|
|
||||||
const auto* column = a + lda * j;
|
|
||||||
for (auto i = 0; i < m; i += 4) {
|
|
||||||
auto sf32 = sum.data() + i;
|
|
||||||
auto matRow = vcvt_f32_f16(vld1_f16(column + i));
|
|
||||||
auto resVec = j != 0 ? vld1q_f32(sf32) : vdupq_n_f32(0);
|
|
||||||
resVec = vfmaq_lane_f32(resVec, matRow, vecCol, 0);
|
|
||||||
vst1q_f32(sf32, resVec);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto i = 0; i < m; i+= 4) {
|
|
||||||
vst1_f16(y + i, vcvt_f16_f32(vld1q_f32(sum.data() + i)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void fp16_gemv_notrans(
|
void fp16_gemv_notrans(
|
||||||
const int m,
|
const int m,
|
||||||
const int n,
|
const int n,
|
||||||
@ -166,55 +143,17 @@ void fp16_gemv_notrans(
|
|||||||
Half* y,
|
Half* y,
|
||||||
const int incy);
|
const int incy);
|
||||||
|
|
||||||
void fp16_gemv_notrans(
|
|
||||||
const int m,
|
|
||||||
const int n,
|
|
||||||
const float alpha,
|
|
||||||
const Half* a,
|
|
||||||
const int lda,
|
|
||||||
const Half* x,
|
|
||||||
const int incx,
|
|
||||||
const float beta,
|
|
||||||
Half* y,
|
|
||||||
const int incy) {
|
|
||||||
if (incx == 1 && alpha == 1.0 && beta == 0.0 && m % 4 == 0 && incy == 1) {
|
|
||||||
#ifdef __ARM_FEATURE_FP16_SCALAR_ARITHMETIC
|
|
||||||
if (at::globalContext().allowFP16ReductionCPU()) {
|
|
||||||
return fp16_gemv_notrans_fp16_arith(m, n, reinterpret_cast<const float16_t*>(a), lda, reinterpret_cast<const float16_t*>(x), reinterpret_cast<float16_t*>(y));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return fp16_gemv_notrans_fp32_arith(m, n, reinterpret_cast<const float16_t*>(a), lda, reinterpret_cast<const float16_t*>(x), reinterpret_cast<float16_t*>(y));
|
|
||||||
}
|
|
||||||
std::vector<float> sum(m);
|
|
||||||
for (const auto j : c10::irange(n)) {
|
|
||||||
const auto* column_ = a + lda * j;
|
|
||||||
auto z = alpha * x[j * incx];
|
|
||||||
for (const auto i : c10::irange(m)) {
|
|
||||||
sum[i] += z * column_[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (beta == 0.0) {
|
|
||||||
for (const auto i : c10::irange(m)) {
|
|
||||||
y[i * incy] = sum[i];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (const auto i : c10::irange(m)) {
|
|
||||||
y[i * incy] += sum[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // defined(__aarch64__) && !defined(C10_MOBILE)
|
#endif // defined(__aarch64__) && !defined(C10_MOBILE)
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static bool scal_use_fast_path(
|
bool scal_use_fast_path(
|
||||||
[[maybe_unused]] int64_t n,
|
[[maybe_unused]] int64_t n,
|
||||||
[[maybe_unused]] int64_t incx) {
|
[[maybe_unused]] int64_t incx) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static bool gemv_use_fast_path(
|
bool gemv_use_fast_path(
|
||||||
[[maybe_unused]] char trans,
|
[[maybe_unused]] char trans,
|
||||||
[[maybe_unused]] int64_t m,
|
[[maybe_unused]] int64_t m,
|
||||||
[[maybe_unused]] int64_t n,
|
[[maybe_unused]] int64_t n,
|
||||||
@ -227,7 +166,7 @@ static bool gemv_use_fast_path(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static void scal_fast_path(
|
void scal_fast_path(
|
||||||
[[maybe_unused]] int* n,
|
[[maybe_unused]] int* n,
|
||||||
[[maybe_unused]] scalar_t* a,
|
[[maybe_unused]] scalar_t* a,
|
||||||
[[maybe_unused]] scalar_t* x,
|
[[maybe_unused]] scalar_t* x,
|
||||||
@ -237,7 +176,7 @@ static void scal_fast_path(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static void gemv_fast_path(
|
void gemv_fast_path(
|
||||||
[[maybe_unused]] const char* trans,
|
[[maybe_unused]] const char* trans,
|
||||||
[[maybe_unused]] const int* m,
|
[[maybe_unused]] const int* m,
|
||||||
[[maybe_unused]] const int* n,
|
[[maybe_unused]] const int* n,
|
||||||
@ -319,6 +258,10 @@ template <>
|
|||||||
void gemv_fast_path<float>(const char *trans, const int *m, const int *n, const float *alpha, const float *a, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy) {
|
void gemv_fast_path<float>(const char *trans, const int *m, const int *n, const float *alpha, const float *a, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy) {
|
||||||
sgemv_(remove_const(trans), remove_const(m), remove_const(n), remove_const(alpha), remove_const(a), remove_const(lda), remove_const(x), remove_const(incx), remove_const(beta), y, remove_const(incy));
|
sgemv_(remove_const(trans), remove_const(m), remove_const(n), remove_const(alpha), remove_const(a), remove_const(lda), remove_const(x), remove_const(incx), remove_const(beta), y, remove_const(incy));
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
INSTANTIATE(float)
|
||||||
|
INSTANTIATE(double)
|
||||||
|
#endif // AT_BUILD_WITH_BLAS
|
||||||
|
|
||||||
INSTANTIATE(uint8_t)
|
INSTANTIATE(uint8_t)
|
||||||
INSTANTIATE(int8_t)
|
INSTANTIATE(int8_t)
|
||||||
@ -340,7 +283,7 @@ bool gemv_use_fast_path<at::BFloat16>(
|
|||||||
beta == 0.0;
|
beta == 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bf16_gemv_trans(
|
void bf16_gemv_trans(
|
||||||
const int m,
|
const int m,
|
||||||
const int n,
|
const int n,
|
||||||
const at::BFloat16 alpha,
|
const at::BFloat16 alpha,
|
||||||
@ -425,7 +368,14 @@ void gemv_fast_path<at::Half>(
|
|||||||
y,
|
y,
|
||||||
*incy);
|
*incy);
|
||||||
}
|
}
|
||||||
#else // !defined(__aarch64__))
|
#else
|
||||||
|
template <>
|
||||||
|
bool scal_use_fast_path<at::Half>(
|
||||||
|
[[maybe_unused]] int64_t n,
|
||||||
|
[[maybe_unused]] int64_t incx) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
bool gemv_use_fast_path<at::Half>(
|
bool gemv_use_fast_path<at::Half>(
|
||||||
char trans,
|
char trans,
|
||||||
@ -441,6 +391,79 @@ bool gemv_use_fast_path<at::Half>(
|
|||||||
(c10::detail::fp16_from_bits(beta.x) == 0.0f || trans == 't' || trans == 'T');
|
(c10::detail::fp16_from_bits(beta.x) == 0.0f || trans == 't' || trans == 'T');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __ARM_FEATURE_FP16_SCALAR_ARITHMETIC
|
||||||
|
static void fp16_gemv_notrans_fp16_arith(int m, int n, const float16_t* a, const int lda, const float16_t *x, float16_t *y) {
|
||||||
|
for (auto j = 0; j < n; j++) {
|
||||||
|
auto vecCol = vdup_n_f16(x[j]);
|
||||||
|
const auto* column = a + lda * j;
|
||||||
|
for (auto i = 0; i < m; i += 4) {
|
||||||
|
auto yf16 = y + i;
|
||||||
|
auto matRow = vld1_f16(column + i);
|
||||||
|
auto resVec = j != 0 ? vld1_f16(yf16) : vdup_n_f16(0);
|
||||||
|
resVec = vfma_lane_f16(resVec, matRow, vecCol, 0);
|
||||||
|
vst1_f16(yf16, resVec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void fp16_gemv_notrans_fp32_arith(int m, int n, const float16_t* a, const int lda, const float16_t *x, float16_t *y) {
|
||||||
|
std::vector<float> sum(m);
|
||||||
|
for (auto j = 0; j < n; j++) {
|
||||||
|
auto vecCol = vdup_n_f32(x[j]);
|
||||||
|
const auto* column = a + lda * j;
|
||||||
|
for (auto i = 0; i < m; i += 4) {
|
||||||
|
auto sf32 = sum.data() + i;
|
||||||
|
auto matRow = vcvt_f32_f16(vld1_f16(column + i));
|
||||||
|
auto resVec = j != 0 ? vld1q_f32(sf32) : vdupq_n_f32(0);
|
||||||
|
resVec = vfmaq_lane_f32(resVec, matRow, vecCol, 0);
|
||||||
|
vst1q_f32(sf32, resVec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto i = 0; i < m; i+= 4) {
|
||||||
|
vst1_f16(y + i, vcvt_f16_f32(vld1q_f32(sum.data() + i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void fp16_gemv_notrans(
|
||||||
|
const int m,
|
||||||
|
const int n,
|
||||||
|
const float alpha,
|
||||||
|
const Half* a,
|
||||||
|
const int lda,
|
||||||
|
const Half* x,
|
||||||
|
const int incx,
|
||||||
|
const float beta,
|
||||||
|
Half* y,
|
||||||
|
const int incy) {
|
||||||
|
if (incx == 1 && alpha == 1.0 && beta == 0.0 && m % 4 == 0 && incy == 1) {
|
||||||
|
#ifdef __ARM_FEATURE_FP16_SCALAR_ARITHMETIC
|
||||||
|
if (at::globalContext().allowFP16ReductionCPU()) {
|
||||||
|
return fp16_gemv_notrans_fp16_arith(m, n, reinterpret_cast<const float16_t*>(a), lda, reinterpret_cast<const float16_t*>(x), reinterpret_cast<float16_t*>(y));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return fp16_gemv_notrans_fp32_arith(m, n, reinterpret_cast<const float16_t*>(a), lda, reinterpret_cast<const float16_t*>(x), reinterpret_cast<float16_t*>(y));
|
||||||
|
}
|
||||||
|
std::vector<float> sum(m);
|
||||||
|
for (const auto j : c10::irange(n)) {
|
||||||
|
const auto* column_ = a + lda * j;
|
||||||
|
auto z = alpha * x[j * incx];
|
||||||
|
for (const auto i : c10::irange(m)) {
|
||||||
|
sum[i] += z * column_[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (beta == 0.0) {
|
||||||
|
for (const auto i : c10::irange(m)) {
|
||||||
|
y[i * incy] = sum[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (const auto i : c10::irange(m)) {
|
||||||
|
y[i * incy] += sum[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void gemv_fast_path<at::Half>(
|
void gemv_fast_path<at::Half>(
|
||||||
const char* trans,
|
const char* trans,
|
||||||
@ -488,7 +511,6 @@ void gemv_fast_path<at::Half>(
|
|||||||
INSTANTIATE(c10::Half)
|
INSTANTIATE(c10::Half)
|
||||||
INSTANTIATE(c10::BFloat16)
|
INSTANTIATE(c10::BFloat16)
|
||||||
#endif // !defined(C10_MOBILE)
|
#endif // !defined(C10_MOBILE)
|
||||||
#endif // AT_BUILD_WITH_BLAS
|
|
||||||
#undef INSTANTIATE
|
#undef INSTANTIATE
|
||||||
|
|
||||||
} // namespace blas_impl
|
} // namespace blas_impl
|
||||||
|
|||||||
@ -554,7 +554,7 @@ using is_blas_library_type = std::integral_constant<bool,
|
|||||||
std::is_same_v<scalar_t, c10::complex<float>>>;
|
std::is_same_v<scalar_t, c10::complex<float>>>;
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static void gemm_batched_generic(
|
void gemm_batched_generic(
|
||||||
TransposeType transa, TransposeType transb,
|
TransposeType transa, TransposeType transb,
|
||||||
int64_t batch_size, int64_t m, int64_t n, int64_t k,
|
int64_t batch_size, int64_t m, int64_t n, int64_t k,
|
||||||
scalar_t alpha,
|
scalar_t alpha,
|
||||||
@ -568,7 +568,7 @@ static void gemm_batched_generic(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static void gemm_batched(
|
void gemm_batched(
|
||||||
TransposeType transa, TransposeType transb,
|
TransposeType transa, TransposeType transb,
|
||||||
int64_t batch_size, int64_t m, int64_t n, int64_t k,
|
int64_t batch_size, int64_t m, int64_t n, int64_t k,
|
||||||
scalar_t alpha,
|
scalar_t alpha,
|
||||||
@ -596,7 +596,7 @@ static void gemm_batched(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static void gemm_batched_with_stride_generic(
|
void gemm_batched_with_stride_generic(
|
||||||
TransposeType transa, TransposeType transb,
|
TransposeType transa, TransposeType transb,
|
||||||
int64_t batch_size, int64_t m, int64_t n, int64_t k,
|
int64_t batch_size, int64_t m, int64_t n, int64_t k,
|
||||||
scalar_t alpha,
|
scalar_t alpha,
|
||||||
@ -945,7 +945,7 @@ struct PackKey {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline dnnl::memory::data_type get_dnnl_dtype(ScalarType dtype) {
|
inline dnnl::memory::data_type get_dnnl_dtype(ScalarType dtype) {
|
||||||
if (dtype == ScalarType::Float) {
|
if (dtype == ScalarType::Float) {
|
||||||
return dnnl::memory::data_type::f32;
|
return dnnl::memory::data_type::f32;
|
||||||
} else if (dtype == ScalarType::BFloat16) {
|
} else if (dtype == ScalarType::BFloat16) {
|
||||||
|
|||||||
@ -13,13 +13,15 @@ class Tensor;
|
|||||||
namespace native {
|
namespace native {
|
||||||
|
|
||||||
template<typename O, typename C>
|
template<typename O, typename C>
|
||||||
static void _assert_match(const O& original, const C& compared, const std::string& name) {
|
void _assert_match(const O& original, const C& compared, const std::string& name) {
|
||||||
if (compared) {
|
if (compared) {
|
||||||
bool equal = (original == compared.value());
|
bool equal = (original == compared.value());
|
||||||
if (!equal) {
|
if (!equal) {
|
||||||
std::stringstream msg;
|
std::stringstream msg;
|
||||||
msg << "Tensor " << name << " mismatch! Expected: " << compared.value() << ", Got: " << original;
|
msg << "Tensor " << name << " mismatch!";
|
||||||
throw std::runtime_error(msg.str());
|
if (!equal) {
|
||||||
|
throw std::runtime_error(msg.str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -437,19 +437,4 @@ inline bool xpu_conv_use_channels_last(const at::Tensor& input, const at::Tensor
|
|||||||
return is_channel_last(input) || is_channel_last(weight);
|
return is_channel_last(input) || is_channel_last(weight);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool mps_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
|
|
||||||
|
|
||||||
// check layout only for mps tensor.
|
|
||||||
if (!input.is_mps() || !weight.is_mps()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!input.defined() || input.is_sparse()) {
|
|
||||||
// suggest channels_first
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto fmt = input.suggest_memory_format();
|
|
||||||
return fmt == at::MemoryFormat::ChannelsLast || fmt == at::MemoryFormat::ChannelsLast3d;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace at::native
|
} // namespace at::native
|
||||||
|
|||||||
@ -30,10 +30,6 @@
|
|||||||
#include <ATen/native/mkldnn/Utils.h>
|
#include <ATen/native/mkldnn/Utils.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef USE_MPS
|
|
||||||
#include <ATen/mps/MPSDevice.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef AT_PER_OPERATOR_HEADERS
|
#ifndef AT_PER_OPERATOR_HEADERS
|
||||||
#include <ATen/Functions.h>
|
#include <ATen/Functions.h>
|
||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
@ -97,7 +93,7 @@ static bool conv_benchmark_empty_cache = true;
|
|||||||
|
|
||||||
// Check workload to activate fast depthwise FP16 cudnn conv kernels
|
// Check workload to activate fast depthwise FP16 cudnn conv kernels
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static bool check_cudnn_depthwise_workload(const at::Tensor& input, T stride) {
|
bool check_cudnn_depthwise_workload(const at::Tensor& input, T stride) {
|
||||||
auto w = at::symint::size<T>(input, 3); // same as h
|
auto w = at::symint::size<T>(input, 3); // same as h
|
||||||
auto ch = at::symint::size<T>(input, 1);
|
auto ch = at::symint::size<T>(input, 1);
|
||||||
auto bs = at::symint::size<T>(input, 0);
|
auto bs = at::symint::size<T>(input, 0);
|
||||||
@ -220,7 +216,7 @@ static bool check_cudnn_depthwise_workload(const at::Tensor& input, T stride) {
|
|||||||
|
|
||||||
// simplified version for cudnn 8.2 and above
|
// simplified version for cudnn 8.2 and above
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static bool check_cudnn_depthwise_workload_with_filter(const at::Tensor& input, T stride, const at::Tensor& weight) {
|
bool check_cudnn_depthwise_workload_with_filter(const at::Tensor& input, T stride, const at::Tensor& weight) {
|
||||||
// 1D conv
|
// 1D conv
|
||||||
if(at::symint::size<T>(input, 2) == 1 && stride == 1){
|
if(at::symint::size<T>(input, 2) == 1 && stride == 1){
|
||||||
return true;
|
return true;
|
||||||
@ -640,7 +636,7 @@ REGISTER_NO_CPU_DISPATCH(miopen_convolution_transpose_backward_stub)
|
|||||||
REGISTER_NO_CPU_DISPATCH(miopen_depthwise_convolution_backward_stub)
|
REGISTER_NO_CPU_DISPATCH(miopen_depthwise_convolution_backward_stub)
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static std::ostream& operator<<(std::ostream & out, const ConvParams<T>& params) {
|
std::ostream& operator<<(std::ostream & out, const ConvParams<T>& params) {
|
||||||
out << "ConvParams {"
|
out << "ConvParams {"
|
||||||
<< " stride = " << IntArrayRef{params.stride}
|
<< " stride = " << IntArrayRef{params.stride}
|
||||||
<< " padding = " << ArrayRef<T>{params.padding}
|
<< " padding = " << ArrayRef<T>{params.padding}
|
||||||
@ -1203,7 +1199,7 @@ at::Tensor convolution_overrideable(
|
|||||||
// a bool indicating whether the bias is defined. This is done to save memory by
|
// a bool indicating whether the bias is defined. This is done to save memory by
|
||||||
// avoiding saving the full bias tensor for backward.
|
// avoiding saving the full bias tensor for backward.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static ConvBackend _select_conv_backend(
|
ConvBackend _select_conv_backend(
|
||||||
const Tensor& input,
|
const Tensor& input,
|
||||||
const Tensor& weight,
|
const Tensor& weight,
|
||||||
const std::optional<Tensor>& bias,
|
const std::optional<Tensor>& bias,
|
||||||
@ -1417,7 +1413,7 @@ static inline at::MemoryFormat determine_backend_memory_format(
|
|||||||
const Tensor& input,
|
const Tensor& input,
|
||||||
const Tensor& weight,
|
const Tensor& weight,
|
||||||
const ConvBackend backend) {
|
const ConvBackend backend) {
|
||||||
auto backend_memory_format = at::MemoryFormat::Contiguous;
|
at::MemoryFormat backend_memory_format = at::MemoryFormat::Contiguous;
|
||||||
#if !defined(C10_MOBILE)
|
#if !defined(C10_MOBILE)
|
||||||
auto k = weight.ndimension();
|
auto k = weight.ndimension();
|
||||||
// See Note [Mobile check segfaults]
|
// See Note [Mobile check segfaults]
|
||||||
@ -1455,17 +1451,6 @@ static inline at::MemoryFormat determine_backend_memory_format(
|
|||||||
backend_memory_format = (k == 5) ? at::MemoryFormat::ChannelsLast3d : at::MemoryFormat::ChannelsLast;
|
backend_memory_format = (k == 5) ? at::MemoryFormat::ChannelsLast3d : at::MemoryFormat::ChannelsLast;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ConvBackend::Mps:
|
|
||||||
case ConvBackend::MpsTranspose:
|
|
||||||
if (mps_conv_use_channels_last(input, weight)) {
|
|
||||||
#ifdef USE_MPS
|
|
||||||
if (!mps::is_macos_13_or_newer(mps::MacOSVersion::MACOS_VER_15_0_PLUS)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
backend_memory_format = (k == 5) ? MemoryFormat::ChannelsLast3d : MemoryFormat::ChannelsLast;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
backend_memory_format = at::MemoryFormat::Contiguous;
|
backend_memory_format = at::MemoryFormat::Contiguous;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1059,7 +1059,7 @@ static Tensor apply_bag_size_backward(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static void embedding_bag_cpu_max_out(
|
void embedding_bag_cpu_max_out(
|
||||||
Tensor* max_indices,
|
Tensor* max_indices,
|
||||||
const Tensor& weight,
|
const Tensor& weight,
|
||||||
const Tensor& indices,
|
const Tensor& indices,
|
||||||
@ -1505,7 +1505,7 @@ static std::vector<index_t> compute_counts_uniq(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static void _embedding_bag_dense_backward_cpu_sum_mean(
|
void _embedding_bag_dense_backward_cpu_sum_mean(
|
||||||
const Tensor& grad,
|
const Tensor& grad,
|
||||||
const Tensor& indices_,
|
const Tensor& indices_,
|
||||||
const Tensor& offset2bag_,
|
const Tensor& offset2bag_,
|
||||||
@ -1641,7 +1641,7 @@ Tensor _embedding_bag_dense_backward_cpu(const Tensor &grad_, const Tensor &indi
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename scalar_t>
|
template<typename scalar_t>
|
||||||
static Tensor _embedding_bag_per_sample_weights_backward_cpu_template(
|
Tensor _embedding_bag_per_sample_weights_backward_cpu_template(
|
||||||
const Tensor& grad,
|
const Tensor& grad,
|
||||||
const Tensor& weight, // NB: embedding table, not per_sample_weights
|
const Tensor& weight, // NB: embedding table, not per_sample_weights
|
||||||
const Tensor& indices_,
|
const Tensor& indices_,
|
||||||
|
|||||||
@ -5,7 +5,6 @@
|
|||||||
#include <ATen/WrapDimUtilsMulti.h>
|
#include <ATen/WrapDimUtilsMulti.h>
|
||||||
#include <ATen/TensorOperators.h>
|
#include <ATen/TensorOperators.h>
|
||||||
#include <c10/util/irange.h>
|
#include <c10/util/irange.h>
|
||||||
#include <c10/core/GradMode.h>
|
|
||||||
#include <c10/core/SymInt.h>
|
#include <c10/core/SymInt.h>
|
||||||
#include <c10/util/MaybeOwned.h>
|
#include <c10/util/MaybeOwned.h>
|
||||||
#include <ATen/TensorSubclassLikeUtils.h>
|
#include <ATen/TensorSubclassLikeUtils.h>
|
||||||
@ -159,11 +158,11 @@ static Tensor sumproduct_pair(const Tensor& left_, const Tensor& right_, IntArra
|
|||||||
Tensor left = left_;
|
Tensor left = left_;
|
||||||
Tensor right = right_;
|
Tensor right = right_;
|
||||||
for (const auto i : c10::irange(dim)) {
|
for (const auto i : c10::irange(dim)) {
|
||||||
auto sl = TORCH_GUARD_SIZE_OBLIVIOUS(left.sym_size(i).sym_ne(1));
|
auto sl = left.sym_size(i)!=1;
|
||||||
auto sr = TORCH_GUARD_SIZE_OBLIVIOUS(right.sym_size(i).sym_ne(1));
|
auto sr = right.sym_size(i)!=1;
|
||||||
if (sum_dims[i]) { // first dimensions that will be summed over after multiplication
|
if (sum_dims[i]) { // first dimensions that will be summed over after multiplication
|
||||||
if (sl && sr) { // dimensions nontrivially in both left and right must be of the same size
|
if (sl && sr) { // dimensions nontrivially in both left and right must be of the same size
|
||||||
TORCH_SYM_CHECK(left.sym_size(i).sym_eq(right.sym_size(i)), "non-broadcast dimensions must match");
|
TORCH_CHECK(left.sym_size(i)==right.sym_size(i), "non-broadcast dimensions must match");
|
||||||
sum_size *= left.sym_size(i);
|
sum_size *= left.sym_size(i);
|
||||||
} else if (sl) { // if it is only in one of left and right, we can sum right away
|
} else if (sl) { // if it is only in one of left and right, we can sum right away
|
||||||
left = left.sum(i, true);
|
left = left.sum(i, true);
|
||||||
@ -172,7 +171,7 @@ static Tensor sumproduct_pair(const Tensor& left_, const Tensor& right_, IntArra
|
|||||||
}
|
}
|
||||||
} else if (sl && sr) { // now deal with dimensions that will be in the output
|
} else if (sl && sr) { // now deal with dimensions that will be in the output
|
||||||
// dimensions nontrivially in both left and right must be of the same size
|
// dimensions nontrivially in both left and right must be of the same size
|
||||||
TORCH_SYM_CHECK(left.sym_size(i).sym_eq(right.sym_size(i)), "non-broadcast dimensions must match");
|
TORCH_CHECK(left.sym_size(i)==right.sym_size(i), "non-broadcast dimensions must match");
|
||||||
lro.push_back(i);
|
lro.push_back(i);
|
||||||
lro_size *= left.sym_size(i);
|
lro_size *= left.sym_size(i);
|
||||||
} else if (sl) { // keep track of dimensions appearing only once
|
} else if (sl) { // keep track of dimensions appearing only once
|
||||||
@ -482,10 +481,10 @@ Tensor einsum(std::string_view equation, TensorList operands, at::OptionalIntArr
|
|||||||
// Iterate over each dimension covered by ellipsis
|
// Iterate over each dimension covered by ellipsis
|
||||||
const auto ndim = operands[i].ndimension() - (static_cast<int64_t>(op_labels[i].size()) - 1);
|
const auto ndim = operands[i].ndimension() - (static_cast<int64_t>(op_labels[i].size()) - 1);
|
||||||
for (auto j = ell_num_dim - ndim; j < ell_num_dim; ++j) {
|
for (auto j = ell_num_dim - ndim; j < ell_num_dim; ++j) {
|
||||||
if (TORCH_GUARD_SIZE_OBLIVIOUS(op.sym_size(dim).sym_ne(1))) {
|
if (op.sym_size(dim) != 1) {
|
||||||
// Update ellipsis size
|
// Update ellipsis size
|
||||||
TORCH_SYM_CHECK(
|
TORCH_CHECK(
|
||||||
ell_sizes[j].sym_eq(1).sym_or(ell_sizes[j].sym_eq(op.sym_size(dim))),
|
ell_sizes[j] == 1 || ell_sizes[j] == op.sym_size(dim),
|
||||||
"einsum(): dimension ",
|
"einsum(): dimension ",
|
||||||
dim,
|
dim,
|
||||||
" covered by ellipsis in operand ",
|
" covered by ellipsis in operand ",
|
||||||
@ -501,10 +500,10 @@ Tensor einsum(std::string_view equation, TensorList operands, at::OptionalIntArr
|
|||||||
permutation[ell_index + j] = dim++;
|
permutation[ell_index + j] = dim++;
|
||||||
}
|
}
|
||||||
} else if (permutation[label_perm_index[s]] == -1) {
|
} else if (permutation[label_perm_index[s]] == -1) {
|
||||||
if (TORCH_GUARD_SIZE_OBLIVIOUS(op.sym_size(dim).sym_ne(1))) {
|
if (op.sym_size(dim) != 1) {
|
||||||
// Update subscript
|
// Update subscript
|
||||||
TORCH_SYM_CHECK(
|
TORCH_CHECK(
|
||||||
label_size[s].sym_eq(1).sym_or(label_size[s].sym_eq(op.sym_size(dim))),
|
label_size[s] == 1 || label_size[s] == op.sym_size(dim),
|
||||||
"einsum(): subscript ",
|
"einsum(): subscript ",
|
||||||
subscript_to_label(s),
|
subscript_to_label(s),
|
||||||
" has size ",
|
" has size ",
|
||||||
@ -579,17 +578,16 @@ Tensor einsum(std::string_view equation, TensorList operands, at::OptionalIntArr
|
|||||||
SmallVector<int64_t, 5> a_dims_to_sum;
|
SmallVector<int64_t, 5> a_dims_to_sum;
|
||||||
SmallVector<int64_t, 5> b_dims_to_sum;
|
SmallVector<int64_t, 5> b_dims_to_sum;
|
||||||
for (auto dim = out_num_dim; dim < perm_index; ++dim) {
|
for (auto dim = out_num_dim; dim < perm_index; ++dim) {
|
||||||
if (TORCH_GUARD_SIZE_OBLIVIOUS(a.sym_size(dim).sym_ne(1))
|
if (a.sym_size(dim) != 1 && b.sym_size(dim) != 1) {
|
||||||
&& TORCH_GUARD_SIZE_OBLIVIOUS(b.sym_size(dim).sym_ne(1))) {
|
|
||||||
if (--dim_counts[dim] == 1) {
|
if (--dim_counts[dim] == 1) {
|
||||||
sum_dims.push_back(dim);
|
sum_dims.push_back(dim);
|
||||||
dim_counts[dim] = 0;
|
dim_counts[dim] = 0;
|
||||||
}
|
}
|
||||||
} else if (dim_counts[dim] == 1) {
|
} else if (dim_counts[dim] == 1) {
|
||||||
if (TORCH_GUARD_SIZE_OBLIVIOUS(a.sym_size(dim).sym_ne(1))) {
|
if (a.sym_size(dim) != 1) {
|
||||||
a_dims_to_sum.push_back(dim);
|
a_dims_to_sum.push_back(dim);
|
||||||
dim_counts[dim] = 0;
|
dim_counts[dim] = 0;
|
||||||
} else if (TORCH_GUARD_SIZE_OBLIVIOUS(b.sym_size(dim).sym_ne(1))) {
|
} else if (b.sym_size(dim) != 1) {
|
||||||
b_dims_to_sum.push_back(dim);
|
b_dims_to_sum.push_back(dim);
|
||||||
dim_counts[dim] = 0;
|
dim_counts[dim] = 0;
|
||||||
}
|
}
|
||||||
@ -833,14 +831,6 @@ Tensor &tensordot_out(const Tensor& input1, const Tensor& input2, IntArrayRef di
|
|||||||
auto output_device = result.device();
|
auto output_device = result.device();
|
||||||
auto input1_device = input1.device();
|
auto input1_device = input1.device();
|
||||||
auto input2_device = input2.device();
|
auto input2_device = input2.device();
|
||||||
|
|
||||||
if(result.defined()) {
|
|
||||||
TORCH_CHECK(
|
|
||||||
!(result.requires_grad() && at::GradMode::is_enabled() && result.sizes() != result_tmp.sizes()),
|
|
||||||
"tensordot(): the 'out' tensor was specified and requires gradients, and its shape does not match the expected result. "
|
|
||||||
"Either remove the 'out' argument, ensure it does not require gradients, or make sure its shape matches the expected output."
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// check if the input & output tensors are on the same device.
|
// check if the input & output tensors are on the same device.
|
||||||
TORCH_CHECK(
|
TORCH_CHECK(
|
||||||
(output_device == input1_device) && (input1_device == input2_device),
|
(output_device == input1_device) && (input1_device == input2_device),
|
||||||
|
|||||||
@ -285,7 +285,7 @@ TORCH_META_FUNC(_linalg_slogdet)(const Tensor& A) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Meta>
|
template <typename Meta>
|
||||||
static void common_checks_baddbmm_bmm(Meta& meta, const Tensor& batch1, const Tensor& batch2, const Scalar& beta, const Scalar& alpha, bool is_bmm, const std::optional<Tensor>& self_baddbmm = std::nullopt) {
|
void common_checks_baddbmm_bmm(Meta& meta, const Tensor& batch1, const Tensor& batch2, const Scalar& beta, const Scalar& alpha, bool is_bmm, const std::optional<Tensor>& self_baddbmm = std::nullopt) {
|
||||||
TORCH_CHECK(batch1.dim() == 3, "batch1 must be a 3D tensor");
|
TORCH_CHECK(batch1.dim() == 3, "batch1 must be a 3D tensor");
|
||||||
TORCH_CHECK(batch2.dim() == 3, "batch2 must be a 3D tensor");
|
TORCH_CHECK(batch2.dim() == 3, "batch2 must be a 3D tensor");
|
||||||
|
|
||||||
@ -1639,7 +1639,7 @@ TORCH_IMPL_FUNC(mm_out_cpu)(const Tensor & self, const Tensor & mat2, const Tens
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t, bool is_bmm>
|
template <typename scalar_t, bool is_bmm>
|
||||||
static inline void baddbmm_cpu_kernel(const Tensor& result, const Tensor& self, const Tensor& mat2, const Scalar& beta_, const Scalar& alpha_) {
|
inline void baddbmm_cpu_kernel(const Tensor& result, const Tensor& self, const Tensor& mat2, const Scalar& beta_, const Scalar& alpha_) {
|
||||||
int64_t bs = result.size(0);
|
int64_t bs = result.size(0);
|
||||||
int64_t is = result.size(1);
|
int64_t is = result.size(1);
|
||||||
int64_t js = result.size(2);
|
int64_t js = result.size(2);
|
||||||
|
|||||||
@ -126,7 +126,6 @@ std::tuple<Tensor, Tensor, size_t, std::vector<int64_t>> ctc_loss_allocate_outpu
|
|||||||
// the alphas from the user by only returning the loss.
|
// the alphas from the user by only returning the loss.
|
||||||
template<typename scalar_t, ScalarType target_scalar_type>
|
template<typename scalar_t, ScalarType target_scalar_type>
|
||||||
std::tuple<Tensor, Tensor> ctc_loss_cpu_template(const Tensor& log_probs, const Tensor& targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t BLANK) {
|
std::tuple<Tensor, Tensor> ctc_loss_cpu_template(const Tensor& log_probs, const Tensor& targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t BLANK) {
|
||||||
TORCH_CHECK(log_probs.numel() > 0, "log_probs tensor must not be empty");
|
|
||||||
// log_probs: input_len x batch_size x num_labels
|
// log_probs: input_len x batch_size x num_labels
|
||||||
// targets [int64]: batch_size x target_length OR sum(target_lengths)
|
// targets [int64]: batch_size x target_length OR sum(target_lengths)
|
||||||
constexpr scalar_t neginf = -std::numeric_limits<scalar_t>::infinity();
|
constexpr scalar_t neginf = -std::numeric_limits<scalar_t>::infinity();
|
||||||
|
|||||||
@ -20,6 +20,9 @@
|
|||||||
|
|
||||||
namespace at::native {
|
namespace at::native {
|
||||||
|
|
||||||
|
template<typename scalar_t>
|
||||||
|
void gemv(char trans, int64_t m, int64_t n, scalar_t alpha, scalar_t *a, int64_t lda, scalar_t *x, int64_t incx, scalar_t beta, scalar_t *y, int64_t incy);
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
static inline void slow_conv_transpose3d_shape_check(
|
static inline void slow_conv_transpose3d_shape_check(
|
||||||
|
|||||||
@ -132,7 +132,7 @@ static inline MemoryFormat suggest_memory_format_contig(const Tensor& t) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename scalar_t, typename param_t>
|
template<typename scalar_t, typename param_t>
|
||||||
static std::tuple<Tensor,Tensor,Tensor> batch_norm_cpu_transform_input_template(
|
std::tuple<Tensor,Tensor,Tensor> batch_norm_cpu_transform_input_template(
|
||||||
const Tensor& input, const Tensor& weight, const Tensor& bias,
|
const Tensor& input, const Tensor& weight, const Tensor& bias,
|
||||||
const Tensor& save_mean /* optional */, const Tensor& save_invstd /* optional */,
|
const Tensor& save_mean /* optional */, const Tensor& save_invstd /* optional */,
|
||||||
const Tensor& running_mean /* optional */, const Tensor& running_var /* optional */,
|
const Tensor& running_mean /* optional */, const Tensor& running_var /* optional */,
|
||||||
@ -197,7 +197,7 @@ static std::tuple<Tensor,Tensor,Tensor> batch_norm_cpu_transform_input_template(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename scalar_t, typename param_t, template<typename T> class VarTransform>
|
template<typename scalar_t, typename param_t, template<typename T> class VarTransform>
|
||||||
static std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
|
std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
|
||||||
const Tensor& input, const Tensor& running_mean, const Tensor& running_var,
|
const Tensor& input, const Tensor& running_mean, const Tensor& running_var,
|
||||||
double momentum, double eps, Tensor& save_mean, Tensor& save_var_transform) {
|
double momentum, double eps, Tensor& save_mean, Tensor& save_var_transform) {
|
||||||
|
|
||||||
@ -287,7 +287,7 @@ static std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename scalar_t, typename param_t, template<typename T> class VarTransform>
|
template<typename scalar_t, typename param_t, template<typename T> class VarTransform>
|
||||||
static std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
|
std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
|
||||||
const Tensor& input, const Tensor& running_mean, const Tensor& running_var,
|
const Tensor& input, const Tensor& running_mean, const Tensor& running_var,
|
||||||
double momentum, double eps) {
|
double momentum, double eps) {
|
||||||
int64_t n_input = input.size(1);
|
int64_t n_input = input.size(1);
|
||||||
@ -306,7 +306,7 @@ static std::tuple<Tensor,Tensor> batch_norm_cpu_update_stats_template(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename scalar_t, typename param_t>
|
template<typename scalar_t, typename param_t>
|
||||||
static std::tuple<Tensor, Tensor, Tensor> batch_norm_backward_cpu_template(
|
std::tuple<Tensor, Tensor, Tensor> batch_norm_backward_cpu_template(
|
||||||
const Tensor& grad_out_, const Tensor& input, const Tensor& weight,
|
const Tensor& grad_out_, const Tensor& input, const Tensor& weight,
|
||||||
const Tensor& running_mean, const Tensor& running_var, const Tensor& save_mean, const Tensor& save_invstd,
|
const Tensor& running_mean, const Tensor& running_var, const Tensor& save_mean, const Tensor& save_invstd,
|
||||||
bool train, double eps, std::array<bool,3> grad_input_mask) {
|
bool train, double eps, std::array<bool,3> grad_input_mask) {
|
||||||
|
|||||||
@ -472,7 +472,7 @@ Tensor& logcumsumexp_out(const Tensor& self, int64_t dim, Tensor& result) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Stub>
|
template <class Stub>
|
||||||
static void impl_func_cum_ops(
|
void impl_func_cum_ops(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
int64_t dim,
|
int64_t dim,
|
||||||
const Tensor& result,
|
const Tensor& result,
|
||||||
@ -769,7 +769,7 @@ inline bool isnan_(T x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T1, typename T2, typename Operation>
|
template<typename T1, typename T2, typename Operation>
|
||||||
static void cummax_cummin_helper(const T1* self_data, T1* values_data, T2* indices_data,
|
void cummax_cummin_helper(const T1* self_data, T1* values_data, T2* indices_data,
|
||||||
int self_dim_size, int self_stride, int values_stride, int indices_stride) {
|
int self_dim_size, int self_stride, int values_stride, int indices_stride) {
|
||||||
Operation op;
|
Operation op;
|
||||||
T1 out = c10::load(self_data);
|
T1 out = c10::load(self_data);
|
||||||
@ -1182,7 +1182,7 @@ std::vector<Tensor> gradient(const Tensor& self, IntArrayRef dim, int64_t edge_o
|
|||||||
|
|
||||||
// ALL REDUCE #################################################################
|
// ALL REDUCE #################################################################
|
||||||
|
|
||||||
static inline bool should_use_acc_buffer(at::TensorIterator& iter) {
|
inline bool should_use_acc_buffer(at::TensorIterator& iter) {
|
||||||
const auto ndim = iter.ndim();
|
const auto ndim = iter.ndim();
|
||||||
if (!iter.device().is_cpu() || iter.noutputs() != 1) {
|
if (!iter.device().is_cpu() || iter.noutputs() != 1) {
|
||||||
return false;
|
return false;
|
||||||
@ -1591,7 +1591,7 @@ Tensor norm(const Tensor& self, const Scalar& p) {
|
|||||||
return at::norm(self, p, IntArrayRef{}, false);
|
return at::norm(self, p, IntArrayRef{}, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline TensorIterator get_allany_iter(
|
inline TensorIterator get_allany_iter(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
const Tensor& result,
|
const Tensor& result,
|
||||||
OptionalIntArrayRef dims,
|
OptionalIntArrayRef dims,
|
||||||
@ -1608,7 +1608,7 @@ static inline TensorIterator get_allany_iter(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int identity, typename Stub>
|
template <int identity, typename Stub>
|
||||||
static inline void allany_impl(
|
inline void allany_impl(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
const Tensor& result,
|
const Tensor& result,
|
||||||
OptionalIntArrayRef dims,
|
OptionalIntArrayRef dims,
|
||||||
@ -1653,7 +1653,7 @@ TORCH_IMPL_FUNC(any_all_out)(const Tensor& self, const Tensor& result) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool is_all>
|
template <bool is_all>
|
||||||
static Tensor allany_dims_default(const Tensor &self, OptionalIntArrayRef dim, bool keepdim) {
|
Tensor allany_dims_default(const Tensor &self, OptionalIntArrayRef dim, bool keepdim) {
|
||||||
// Default implementation in terms of all-reduce or single dim reduce
|
// Default implementation in terms of all-reduce or single dim reduce
|
||||||
if (!dim) {
|
if (!dim) {
|
||||||
Tensor out;
|
Tensor out;
|
||||||
@ -1732,7 +1732,7 @@ TORCH_IMPL_FUNC(amax_out) (const Tensor& self, IntArrayRef dim, bool keepdim, co
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Stub>
|
template <class Stub>
|
||||||
static void argmax_argmin_impl(
|
void argmax_argmin_impl(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
std::optional<int64_t> dim,
|
std::optional<int64_t> dim,
|
||||||
bool keepdim,
|
bool keepdim,
|
||||||
|
|||||||
@ -9,7 +9,6 @@
|
|||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
#else
|
#else
|
||||||
#include <ATen/ops/resize_as_native.h>
|
#include <ATen/ops/resize_as_native.h>
|
||||||
#include <ATen/ops/resize_as_sparse_native.h>
|
|
||||||
#include <ATen/ops/resize_native.h>
|
#include <ATen/ops/resize_native.h>
|
||||||
#include <ATen/ops/resize.h>
|
#include <ATen/ops/resize.h>
|
||||||
#include <ATen/ops/_resize_output.h>
|
#include <ATen/ops/_resize_output.h>
|
||||||
@ -22,7 +21,7 @@ namespace at::native {
|
|||||||
|
|
||||||
// Returns true if resize is necessary
|
// Returns true if resize is necessary
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static bool _resize_output_check(const Tensor& output, ArrayRef<T> shape) {
|
bool _resize_output_check(const Tensor& output, ArrayRef<T> shape) {
|
||||||
// Tests for resizing of tensors with one or more elements
|
// Tests for resizing of tensors with one or more elements
|
||||||
if (at::symint::sizes<T>(output).equals(shape)) {
|
if (at::symint::sizes<T>(output).equals(shape)) {
|
||||||
return false;
|
return false;
|
||||||
@ -57,7 +56,7 @@ static void native_resize_(const Tensor& output, SymIntArrayRef shape) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static bool _resize_output(const Tensor& output, ArrayRef<T> shape) {
|
bool _resize_output(const Tensor& output, ArrayRef<T> shape) {
|
||||||
if (_resize_output_check<T>(output, shape)) {
|
if (_resize_output_check<T>(output, shape)) {
|
||||||
// avoid a redispatch for cpu and cuda.
|
// avoid a redispatch for cpu and cuda.
|
||||||
// TODO: when resize_cuda_ is re-written to be unified with resize_,
|
// TODO: when resize_cuda_ is re-written to be unified with resize_,
|
||||||
@ -197,7 +196,7 @@ static void _maybe_resize_storage(TensorImpl* self, c10::SymInt new_size_bytes)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static TensorImpl* _resize_impl_(
|
TensorImpl* _resize_impl_(
|
||||||
TensorImpl* self,
|
TensorImpl* self,
|
||||||
ArrayRef<T> size,
|
ArrayRef<T> size,
|
||||||
at::OptionalArrayRef<T> stride,
|
at::OptionalArrayRef<T> stride,
|
||||||
@ -235,7 +234,7 @@ TensorImpl* resize_impl_cpu_(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static const Tensor& _resize_(
|
const Tensor& _resize_(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
ArrayRef<T> size,
|
ArrayRef<T> size,
|
||||||
std::optional<MemoryFormat> optional_memory_format) {
|
std::optional<MemoryFormat> optional_memory_format) {
|
||||||
|
|||||||
@ -147,6 +147,7 @@
|
|||||||
|
|
||||||
namespace at::native {
|
namespace at::native {
|
||||||
|
|
||||||
|
std::string shapes_as_str(TensorList tensors);
|
||||||
AdvancedIndex make_info(Tensor self, IOptTensorListRef orig);
|
AdvancedIndex make_info(Tensor self, IOptTensorListRef orig);
|
||||||
|
|
||||||
} // namespace at::native
|
} // namespace at::native
|
||||||
@ -185,7 +186,7 @@ TORCH_META_FUNC(gather)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool use_new_options = false, typename Meta>
|
template <bool use_new_options = false, typename Meta>
|
||||||
static void scatter_meta_impl(
|
void scatter_meta_impl(
|
||||||
Meta& meta,
|
Meta& meta,
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
int64_t dim,
|
int64_t dim,
|
||||||
@ -357,7 +358,7 @@ TORCH_PRECOMPUTE_META_FUNC(index_copy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Meta>
|
template <typename Meta>
|
||||||
static void index_func_meta_impl(
|
void index_func_meta_impl(
|
||||||
Meta& meta,
|
Meta& meta,
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
int64_t dim,
|
int64_t dim,
|
||||||
@ -592,6 +593,21 @@ static bool all_strides_match(TensorList tensors) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline std::string shapes_as_str(TensorList tensors) {
|
||||||
|
std::ostringstream os;
|
||||||
|
bool first = true;
|
||||||
|
for (auto& tensor : tensors) {
|
||||||
|
if (tensor.defined()) {
|
||||||
|
if (!first) {
|
||||||
|
os << ", ";
|
||||||
|
}
|
||||||
|
os << tensor.sizes();
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
|
||||||
// Replace indexed dimensions in src with stride 0 and the size of the result
|
// Replace indexed dimensions in src with stride 0 and the size of the result
|
||||||
// tensor. The offset in these dimensions is computed by the kernel using the
|
// tensor. The offset in these dimensions is computed by the kernel using the
|
||||||
// index tensor's values and the stride of src. The new shape is not meaningful.
|
// index tensor's values and the stride of src. The new shape is not meaningful.
|
||||||
@ -2233,7 +2249,7 @@ template <
|
|||||||
typename T,
|
typename T,
|
||||||
typename ReduceStub,
|
typename ReduceStub,
|
||||||
typename FillStub>
|
typename FillStub>
|
||||||
static void scatter_impl(
|
void scatter_impl(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
int64_t dim,
|
int64_t dim,
|
||||||
const Tensor& index,
|
const Tensor& index,
|
||||||
@ -2806,7 +2822,7 @@ Tensor _gather_sparse_backward(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename scalar_t>
|
template <typename scalar_t>
|
||||||
static int64_t count_nonzero_impl(TensorIteratorBase& iter, Range range) {
|
int64_t count_nonzero_impl(TensorIteratorBase& iter, Range range) {
|
||||||
int64_t num_nonzero = 0;
|
int64_t num_nonzero = 0;
|
||||||
|
|
||||||
auto loop = [&](char** data, const int64_t* strides, int64_t n) {
|
auto loop = [&](char** data, const int64_t* strides, int64_t n) {
|
||||||
|
|||||||
@ -569,7 +569,7 @@ static void isin_sorting(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
static Device out_device(Args&... inps) {
|
Device out_device(Args&... inps) {
|
||||||
for (const auto& i : {inps...}) {
|
for (const auto& i : {inps...}) {
|
||||||
if (i.device() != at::kCPU) {
|
if (i.device() != at::kCPU) {
|
||||||
return i.device();
|
return i.device();
|
||||||
@ -739,7 +739,7 @@ std::tuple<Tensor&, Tensor&> mode_out(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Stub>
|
template <class Stub>
|
||||||
static void minmax_out_impl(
|
void minmax_out_impl(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
int64_t dim,
|
int64_t dim,
|
||||||
bool keepdim,
|
bool keepdim,
|
||||||
|
|||||||
@ -806,7 +806,7 @@ Tensor sparse_compressed_to_dense(
|
|||||||
|
|
||||||
// Computes the strides for view_dtype output when the view dtype is
|
// Computes the strides for view_dtype output when the view dtype is
|
||||||
// smaller than the original dtype
|
// smaller than the original dtype
|
||||||
static inline SymDimVector compute_strides_for_view_dtype_downsize(
|
inline SymDimVector compute_strides_for_view_dtype_downsize(
|
||||||
SymIntArrayRef old_strides,
|
SymIntArrayRef old_strides,
|
||||||
int64_t size_ratio,
|
int64_t size_ratio,
|
||||||
ScalarType old_dtype,
|
ScalarType old_dtype,
|
||||||
@ -832,7 +832,7 @@ static inline SymDimVector compute_strides_for_view_dtype_downsize(
|
|||||||
|
|
||||||
// Computes the strides for view_dtype output when the view dtype is
|
// Computes the strides for view_dtype output when the view dtype is
|
||||||
// larger than the original dtype
|
// larger than the original dtype
|
||||||
static inline SymDimVector compute_strides_for_view_dtype_upsize(
|
inline SymDimVector compute_strides_for_view_dtype_upsize(
|
||||||
SymIntArrayRef old_strides,
|
SymIntArrayRef old_strides,
|
||||||
int64_t size_ratio,
|
int64_t size_ratio,
|
||||||
ScalarType old_dtype,
|
ScalarType old_dtype,
|
||||||
@ -1023,9 +1023,22 @@ static Tensor _mask_to_indices(const Tensor& mask) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static std::pair<Tensor, Tensor> _not_zero_mask_to_col_row_indices(
|
static std::pair<Tensor, Tensor> _not_zero_mask_to_col_row_indices(
|
||||||
Tensor not_zero_mask) {
|
Tensor not_zero_mask,
|
||||||
auto nz = not_zero_mask.nonzero();
|
ScalarType index_dtype,
|
||||||
return {nz.select(1, 1), nz.select(1, 0)};
|
Device index_device) {
|
||||||
|
auto col_indices =
|
||||||
|
at::native::arange(
|
||||||
|
not_zero_mask.size(-1), index_dtype, kStrided, index_device)
|
||||||
|
.view({1, not_zero_mask.size(-1)})
|
||||||
|
.expand_as(not_zero_mask)
|
||||||
|
.masked_select(not_zero_mask);
|
||||||
|
auto row_indices =
|
||||||
|
at::native::arange(
|
||||||
|
not_zero_mask.size(-2), index_dtype, kStrided, index_device)
|
||||||
|
.view({not_zero_mask.size(-2), 1})
|
||||||
|
.expand_as(not_zero_mask)
|
||||||
|
.masked_select(not_zero_mask);
|
||||||
|
return std::pair<Tensor, Tensor>(col_indices, row_indices);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sparse layout conversions Start
|
// Sparse layout conversions Start
|
||||||
@ -1306,8 +1319,8 @@ static Tensor dense_to_sparse_compressed(
|
|||||||
Tensor col_indices;
|
Tensor col_indices;
|
||||||
Tensor compressed_indices;
|
Tensor compressed_indices;
|
||||||
if (compressed_rows_layout) {
|
if (compressed_rows_layout) {
|
||||||
std::tie(col_indices, row_indices) =
|
std::tie(col_indices, row_indices) = _not_zero_mask_to_col_row_indices(
|
||||||
_not_zero_mask_to_col_row_indices(not_zero_mask);
|
not_zero_mask, at::kLong, not_zero_mask.device());
|
||||||
compressed_indices = at::_convert_indices_from_coo_to_csr(
|
compressed_indices = at::_convert_indices_from_coo_to_csr(
|
||||||
row_indices, not_zero_mask.size(0), false /*out_int32*/);
|
row_indices, not_zero_mask.size(0), false /*out_int32*/);
|
||||||
{
|
{
|
||||||
@ -1315,8 +1328,8 @@ static Tensor dense_to_sparse_compressed(
|
|||||||
values = values.flatten(0, 1).index_select(0, mask_indices);
|
values = values.flatten(0, 1).index_select(0, mask_indices);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
std::tie(row_indices, col_indices) =
|
std::tie(row_indices, col_indices) = _not_zero_mask_to_col_row_indices(
|
||||||
_not_zero_mask_to_col_row_indices(not_zero_mask.transpose(1, 0));
|
not_zero_mask.transpose(1, 0), at::kLong, not_zero_mask.device());
|
||||||
compressed_indices = at::_convert_indices_from_coo_to_csr(
|
compressed_indices = at::_convert_indices_from_coo_to_csr(
|
||||||
col_indices, not_zero_mask.size(-1), false /*out_int32*/);
|
col_indices, not_zero_mask.size(-1), false /*out_int32*/);
|
||||||
{
|
{
|
||||||
@ -1976,7 +1989,7 @@ TORCH_IMPL_FUNC(_convert_indices_from_csr_to_coo_structured_cpu)
|
|||||||
* Modified to ensure sorted BSR column indices.
|
* Modified to ensure sorted BSR column indices.
|
||||||
*/
|
*/
|
||||||
template <class index_t, class scalar_t, bool compressed_rows>
|
template <class index_t, class scalar_t, bool compressed_rows>
|
||||||
static void _compressed_to_block_compressed_cpu_kernel(
|
void _compressed_to_block_compressed_cpu_kernel(
|
||||||
const index_t n_compressed, // Tensor size along compressed dimension
|
const index_t n_compressed, // Tensor size along compressed dimension
|
||||||
const index_t n_plain, // Tensor size along plain dimension
|
const index_t n_plain, // Tensor size along plain dimension
|
||||||
const index_t C, // Block size along compressed dimensions
|
const index_t C, // Block size along compressed dimensions
|
||||||
@ -2073,7 +2086,7 @@ static void _compressed_to_block_compressed_cpu_kernel(
|
|||||||
* https://github.com/scipy/scipy/blob/8a64c938ddf1ae4c02a08d2c5e38daeb8d061d38/scipy/sparse/sparsetools/csr.h
|
* https://github.com/scipy/scipy/blob/8a64c938ddf1ae4c02a08d2c5e38daeb8d061d38/scipy/sparse/sparsetools/csr.h
|
||||||
*/
|
*/
|
||||||
template <class index_t>
|
template <class index_t>
|
||||||
static index_t compressed_count_blocks(
|
index_t compressed_count_blocks(
|
||||||
const index_t n_compressed, // Tensor size along compressed dimension
|
const index_t n_compressed, // Tensor size along compressed dimension
|
||||||
const index_t n_plain, // Tensor size along plain dimension
|
const index_t n_plain, // Tensor size along plain dimension
|
||||||
const index_t C, // Block size along compressed dimensions
|
const index_t C, // Block size along compressed dimensions
|
||||||
@ -2097,7 +2110,7 @@ static index_t compressed_count_blocks(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <Layout target_layout>
|
template <Layout target_layout>
|
||||||
static Tensor _compressed_to_block_compressed_cpu(
|
Tensor _compressed_to_block_compressed_cpu(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
IntArrayRef blocksize) {
|
IntArrayRef blocksize) {
|
||||||
static_assert(
|
static_assert(
|
||||||
|
|||||||
@ -2072,24 +2072,22 @@ Tensor vander(const Tensor& x, std::optional<int64_t> N, bool increasing) {
|
|||||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ tensor ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ tensor ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static Tensor tensor_cpu(ArrayRef<T> values, const TensorOptions& options) {
|
Tensor tensor_cpu(ArrayRef<T> values, const TensorOptions& options) {
|
||||||
return at::detail::tensor_cpu(values, options);
|
return at::detail::tensor_cpu(values, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static Tensor tensor_backend(ArrayRef<T> values, const TensorOptions& options) {
|
Tensor tensor_backend(ArrayRef<T> values, const TensorOptions& options) {
|
||||||
return at::detail::tensor_backend(values, options);
|
return at::detail::tensor_backend(values, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static Tensor tensor_complex_cpu(
|
Tensor tensor_complex_cpu(ArrayRef<T> values, const TensorOptions& options) {
|
||||||
ArrayRef<T> values,
|
|
||||||
const TensorOptions& options) {
|
|
||||||
return at::detail::tensor_complex_cpu(values, options);
|
return at::detail::tensor_complex_cpu(values, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static Tensor tensor_complex_backend(
|
Tensor tensor_complex_backend(
|
||||||
ArrayRef<T> values,
|
ArrayRef<T> values,
|
||||||
const TensorOptions& options) {
|
const TensorOptions& options) {
|
||||||
return at::detail::tensor_complex_backend(values, options);
|
return at::detail::tensor_complex_backend(values, options);
|
||||||
|
|||||||
@ -216,7 +216,7 @@
|
|||||||
|
|
||||||
namespace at::meta {
|
namespace at::meta {
|
||||||
|
|
||||||
static inline c10::MemoryFormat cat_compute_output_memory_format(
|
inline c10::MemoryFormat cat_compute_output_memory_format(
|
||||||
const MaterializedITensorListRef& inputs) {
|
const MaterializedITensorListRef& inputs) {
|
||||||
std::optional<c10::MemoryFormat> format = std::nullopt;
|
std::optional<c10::MemoryFormat> format = std::nullopt;
|
||||||
for (const Tensor& t : inputs) {
|
for (const Tensor& t : inputs) {
|
||||||
@ -1119,7 +1119,7 @@ std::vector<Tensor> tensor_split_sections_symint(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static std::vector<Tensor> _tensor_split_indices(
|
std::vector<Tensor> _tensor_split_indices(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
ArrayRef<T> indices,
|
ArrayRef<T> indices,
|
||||||
int64_t dim) {
|
int64_t dim) {
|
||||||
@ -1417,7 +1417,7 @@ Tensor as_strided_tensorimpl(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void setStridedUnchecked(
|
inline void setStridedUnchecked(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
ArrayRef<T> size,
|
ArrayRef<T> size,
|
||||||
ArrayRef<T> stride,
|
ArrayRef<T> stride,
|
||||||
@ -1922,7 +1922,7 @@ Tensor tile_symint(const Tensor& self, SymIntArrayRef reps) {
|
|||||||
// templated for ArrayRef<int64_t> and SmallVector<int64_t> use cases
|
// templated for ArrayRef<int64_t> and SmallVector<int64_t> use cases
|
||||||
//
|
//
|
||||||
template <typename Vec>
|
template <typename Vec>
|
||||||
static Tensor alias_with_sizes_and_strides(
|
Tensor alias_with_sizes_and_strides(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
const Vec& sizes,
|
const Vec& sizes,
|
||||||
const Vec& strides) {
|
const Vec& strides) {
|
||||||
@ -1958,7 +1958,7 @@ static Tensor alias_with_sizes_and_strides(
|
|||||||
// SymIntArrayRef/ArrayRef<c10::SymInt> and
|
// SymIntArrayRef/ArrayRef<c10::SymInt> and
|
||||||
// SmallVector<c10::SymInt>/SymDimVector
|
// SmallVector<c10::SymInt>/SymDimVector
|
||||||
template <template <typename...> typename Container>
|
template <template <typename...> typename Container>
|
||||||
static Tensor alias_with_sizes_and_strides(
|
Tensor alias_with_sizes_and_strides(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
const Container<c10::SymInt>& sizes,
|
const Container<c10::SymInt>& sizes,
|
||||||
const Container<c10::SymInt>& strides) {
|
const Container<c10::SymInt>& strides) {
|
||||||
@ -3290,7 +3290,7 @@ static inline std::vector<Tensor> get_stack_inputs(
|
|||||||
return inputs;
|
return inputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool inline maybe_native_stack(
|
bool inline maybe_native_stack(
|
||||||
Tensor& result,
|
Tensor& result,
|
||||||
TensorList tensors,
|
TensorList tensors,
|
||||||
int64_t dim) {
|
int64_t dim) {
|
||||||
@ -3366,7 +3366,7 @@ static std::vector<Tensor> _pad_chunk(
|
|||||||
std::vector<int64_t> view_sizes(
|
std::vector<int64_t> view_sizes(
|
||||||
tensor_size.begin(), tensor_size.begin() + dim);
|
tensor_size.begin(), tensor_size.begin() + dim);
|
||||||
view_sizes.insert(view_sizes.end(), {num_chunks, -1});
|
view_sizes.insert(view_sizes.end(), {num_chunks, -1});
|
||||||
padded_tensors.push_back(padded_tensor.reshape(view_sizes));
|
padded_tensors.push_back(padded_tensor.view(view_sizes));
|
||||||
}
|
}
|
||||||
return padded_tensors;
|
return padded_tensors;
|
||||||
}
|
}
|
||||||
@ -4021,7 +4021,7 @@ Tensor& squeeze_(Tensor& self, IntArrayRef dims) {
|
|||||||
// This is a hack because in-place operations on tensors treated like views
|
// This is a hack because in-place operations on tensors treated like views
|
||||||
// can be much more expensive than the same operations on non-view tensors.
|
// can be much more expensive than the same operations on non-view tensors.
|
||||||
|
|
||||||
static inline Tensor view_impl(const Tensor& self, IntArrayRef size) {
|
inline Tensor view_impl(const Tensor& self, IntArrayRef size) {
|
||||||
at::DimVector inferred_size = at::infer_size_dv(size, self.numel());
|
at::DimVector inferred_size = at::infer_size_dv(size, self.numel());
|
||||||
auto stride =
|
auto stride =
|
||||||
at::detail::computeStride(self.sizes(), self.strides(), inferred_size);
|
at::detail::computeStride(self.sizes(), self.strides(), inferred_size);
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user