mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-11 22:34:53 +08:00
Update (base update)
[ghstack-poisoned]
This commit is contained in:
@ -363,3 +363,8 @@ setuptools
|
||||
|
||||
ninja==1.11.1 ; platform_machine == "aarch64"
|
||||
scons==4.5.2 ; platform_machine == "aarch64"
|
||||
|
||||
pulp==2.9.0 ; python_version >= "3.8"
|
||||
#Description: required for testing ilp formulaiton under torch/distributed/_tools
|
||||
#Pinned versions: 2.9.0
|
||||
#test that import: test_sac_ilp.py
|
||||
|
||||
10
.ci/libtorch/build.sh
Normal file
10
.ci/libtorch/build.sh
Normal file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is mostly just a shim to manywheel/build.sh
|
||||
# TODO: Make this a dedicated script to build just libtorch
|
||||
|
||||
set -ex
|
||||
|
||||
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh
|
||||
21
.ci/manywheel/LICENSE
Normal file
21
.ci/manywheel/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 manylinux
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
25
.ci/manywheel/build.sh
Executable file
25
.ci/manywheel/build.sh
Executable file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
case "${GPU_ARCH_TYPE:-BLANK}" in
|
||||
BLANK)
|
||||
# Legacy behavior for CircleCI
|
||||
bash "${SCRIPTPATH}/build_cuda.sh"
|
||||
;;
|
||||
cuda)
|
||||
bash "${SCRIPTPATH}/build_cuda.sh"
|
||||
;;
|
||||
rocm)
|
||||
bash "${SCRIPTPATH}/build_rocm.sh"
|
||||
;;
|
||||
cpu | cpu-cxx11-abi | cpu-s390x | xpu)
|
||||
bash "${SCRIPTPATH}/build_cpu.sh"
|
||||
;;
|
||||
*)
|
||||
echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
505
.ci/manywheel/build_common.sh
Normal file
505
.ci/manywheel/build_common.sh
Normal file
@ -0,0 +1,505 @@
|
||||
#!/usr/bin/env bash
|
||||
# meant to be called only from the neighboring build.sh and build_cpu.sh scripts
|
||||
|
||||
set -ex
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
|
||||
|
||||
# Require only one python installation
|
||||
if [[ -z "$DESIRED_PYTHON" ]]; then
|
||||
echo "Need to set DESIRED_PYTHON env variable"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -n "$BUILD_PYTHONLESS" && -z "$LIBTORCH_VARIANT" ]]; then
|
||||
echo "BUILD_PYTHONLESS is set, so need LIBTORCH_VARIANT to also be set"
|
||||
echo "LIBTORCH_VARIANT should be one of shared-with-deps shared-without-deps static-with-deps static-without-deps"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to retry functions that sometimes timeout or have flaky failures
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
# TODO move this into the Docker images
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
retry dnf install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
# TODO: Remove this once nvidia package repos are back online
|
||||
# Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
|
||||
# shellcheck disable=SC2046
|
||||
sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
|
||||
|
||||
retry apt-get update
|
||||
retry apt-get -y install zip openssl
|
||||
fi
|
||||
|
||||
# We use the package name to test the package by passing this to 'pip install'
|
||||
# This is the env variable that setup.py uses to name the package. Note that
|
||||
# pip 'normalizes' the name first by changing all - to _
|
||||
if [[ -z "$TORCH_PACKAGE_NAME" ]]; then
|
||||
TORCH_PACKAGE_NAME='torch'
|
||||
fi
|
||||
|
||||
if [[ -z "$TORCH_NO_PYTHON_PACKAGE_NAME" ]]; then
|
||||
TORCH_NO_PYTHON_PACKAGE_NAME='torch_no_python'
|
||||
fi
|
||||
|
||||
TORCH_PACKAGE_NAME="$(echo $TORCH_PACKAGE_NAME | tr '-' '_')"
|
||||
TORCH_NO_PYTHON_PACKAGE_NAME="$(echo $TORCH_NO_PYTHON_PACKAGE_NAME | tr '-' '_')"
|
||||
echo "Expecting the built wheels to all be called '$TORCH_PACKAGE_NAME' or '$TORCH_NO_PYTHON_PACKAGE_NAME'"
|
||||
|
||||
# Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if
|
||||
# PYTORCH_BUILD_NUMBER > 1
|
||||
build_version="$PYTORCH_BUILD_VERSION"
|
||||
build_number="$PYTORCH_BUILD_NUMBER"
|
||||
if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then
|
||||
# This will be the *exact* version, since build_number<1
|
||||
build_version="$OVERRIDE_PACKAGE_VERSION"
|
||||
build_number=0
|
||||
fi
|
||||
if [[ -z "$build_version" ]]; then
|
||||
build_version=1.0.0
|
||||
fi
|
||||
if [[ -z "$build_number" ]]; then
|
||||
build_number=1
|
||||
fi
|
||||
export PYTORCH_BUILD_VERSION=$build_version
|
||||
export PYTORCH_BUILD_NUMBER=$build_number
|
||||
|
||||
export CMAKE_LIBRARY_PATH="/opt/intel/lib:/lib:$CMAKE_LIBRARY_PATH"
|
||||
export CMAKE_INCLUDE_PATH="/opt/intel/include:$CMAKE_INCLUDE_PATH"
|
||||
|
||||
if [[ -e /opt/openssl ]]; then
|
||||
export OPENSSL_ROOT_DIR=/opt/openssl
|
||||
export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH
|
||||
fi
|
||||
|
||||
# If given a python version like 3.6m or 2.7mu, convert this to the format we
|
||||
# expect. The binary CI jobs pass in python versions like this; they also only
|
||||
# ever pass one python version, so we assume that DESIRED_PYTHON is not a list
|
||||
# in this case
|
||||
if [[ -n "$DESIRED_PYTHON" && $DESIRED_PYTHON =~ ([0-9].[0-9]+)t ]]; then
|
||||
python_digits="$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
|
||||
py_majmin="${DESIRED_PYTHON}"
|
||||
DESIRED_PYTHON="cp${python_digits}-cp${python_digits}t"
|
||||
elif [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then
|
||||
python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)"
|
||||
DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}"
|
||||
if [[ ${python_nodot} -ge 310 ]]; then
|
||||
py_majmin="${DESIRED_PYTHON:2:1}.${DESIRED_PYTHON:3:2}"
|
||||
else
|
||||
py_majmin="${DESIRED_PYTHON:2:1}.${DESIRED_PYTHON:3:1}"
|
||||
fi
|
||||
fi
|
||||
|
||||
pydir="/opt/python/$DESIRED_PYTHON"
|
||||
export PATH="$pydir/bin:$PATH"
|
||||
echo "Will build for Python version: ${DESIRED_PYTHON} with ${python_installation}"
|
||||
|
||||
mkdir -p /tmp/$WHEELHOUSE_DIR
|
||||
|
||||
export PATCHELF_BIN=/usr/local/bin/patchelf
|
||||
patchelf_version=$($PATCHELF_BIN --version)
|
||||
echo "patchelf version: " $patchelf_version
|
||||
if [[ "$patchelf_version" == "patchelf 0.9" ]]; then
|
||||
echo "Your patchelf version is too old. Please use version >= 0.10."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
########################################################
|
||||
# Compile wheels as well as libtorch
|
||||
#######################################################
|
||||
if [[ -z "$PYTORCH_ROOT" ]]; then
|
||||
echo "Need to set PYTORCH_ROOT env variable"
|
||||
exit 1
|
||||
fi
|
||||
pushd "$PYTORCH_ROOT"
|
||||
python setup.py clean
|
||||
retry pip install -qr requirements.txt
|
||||
case ${DESIRED_PYTHON} in
|
||||
cp31*)
|
||||
retry pip install -q --pre numpy==2.1.0
|
||||
;;
|
||||
# Should catch 3.9+
|
||||
*)
|
||||
retry pip install -q --pre numpy==2.0.2
|
||||
;;
|
||||
esac
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
fi
|
||||
|
||||
# This value comes from binary_linux_build.sh (and should only be set to true
|
||||
# for master / release branches)
|
||||
BUILD_DEBUG_INFO=${BUILD_DEBUG_INFO:=0}
|
||||
|
||||
if [[ $BUILD_DEBUG_INFO == "1" ]]; then
|
||||
echo "Building wheel and debug info"
|
||||
else
|
||||
echo "BUILD_DEBUG_INFO was not set, skipping debug info"
|
||||
fi
|
||||
|
||||
if [[ "$DISABLE_RCCL" = 1 ]]; then
|
||||
echo "Disabling NCCL/RCCL in pyTorch"
|
||||
USE_RCCL=0
|
||||
USE_NCCL=0
|
||||
USE_KINETO=0
|
||||
else
|
||||
USE_RCCL=1
|
||||
USE_NCCL=1
|
||||
USE_KINETO=1
|
||||
fi
|
||||
|
||||
echo "Calling setup.py bdist at $(date)"
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
echo "Calling setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||
time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||
echo "Finished setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||
echo "Calling setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||
time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR --cmake
|
||||
echo "Finished setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||
else
|
||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||
fi
|
||||
echo "Finished setup.py bdist at $(date)"
|
||||
|
||||
# Build libtorch packages
|
||||
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
# Now build pythonless libtorch
|
||||
# Note - just use whichever python we happen to be on
|
||||
python setup.py clean
|
||||
|
||||
if [[ $LIBTORCH_VARIANT = *"static"* ]]; then
|
||||
STATIC_CMAKE_FLAG="-DTORCH_STATIC=1"
|
||||
fi
|
||||
|
||||
mkdir -p build
|
||||
pushd build
|
||||
echo "Calling tools/build_libtorch.py at $(date)"
|
||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \
|
||||
python ../tools/build_libtorch.py
|
||||
echo "Finished tools/build_libtorch.py at $(date)"
|
||||
popd
|
||||
|
||||
mkdir -p libtorch/{lib,bin,include,share}
|
||||
cp -r build/build/lib libtorch/
|
||||
|
||||
# for now, the headers for the libtorch package will just be copied in
|
||||
# from one of the wheels (this is from when this script built multiple
|
||||
# wheels at once)
|
||||
ANY_WHEEL=$(ls /tmp/$WHEELHOUSE_DIR/torch*.whl | head -n1)
|
||||
unzip -d any_wheel $ANY_WHEEL
|
||||
if [[ -d any_wheel/torch/include ]]; then
|
||||
cp -r any_wheel/torch/include libtorch/
|
||||
else
|
||||
cp -r any_wheel/torch/lib/include libtorch/
|
||||
fi
|
||||
cp -r any_wheel/torch/share/cmake libtorch/share/
|
||||
rm -rf any_wheel
|
||||
|
||||
echo $PYTORCH_BUILD_VERSION > libtorch/build-version
|
||||
echo "$(pushd $PYTORCH_ROOT && git rev-parse HEAD)" > libtorch/build-hash
|
||||
|
||||
mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
|
||||
cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
|
||||
/tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
|
||||
fi
|
||||
|
||||
popd
|
||||
|
||||
#######################################################################
|
||||
# ADD DEPENDENCIES INTO THE WHEEL
|
||||
#
|
||||
# auditwheel repair doesn't work correctly and is buggy
|
||||
# so manually do the work of copying dependency libs and patchelfing
|
||||
# and fixing RECORDS entries correctly
|
||||
######################################################################
|
||||
|
||||
fname_with_sha256() {
|
||||
HASH=$(sha256sum $1 | cut -c1-8)
|
||||
DIRNAME=$(dirname $1)
|
||||
BASENAME=$(basename $1)
|
||||
# Do not rename nvrtc-builtins.so as they are dynamically loaded
|
||||
# by libnvrtc.so
|
||||
# Similarly don't mangle libcudnn and libcublas library names
|
||||
if [[ $BASENAME == "libnvrtc-builtins.s"* || $BASENAME == "libcudnn"* || $BASENAME == "libcublas"* ]]; then
|
||||
echo $1
|
||||
else
|
||||
INITNAME=$(echo $BASENAME | cut -f1 -d".")
|
||||
ENDNAME=$(echo $BASENAME | cut -f 2- -d".")
|
||||
echo "$DIRNAME/$INITNAME-$HASH.$ENDNAME"
|
||||
fi
|
||||
}
|
||||
|
||||
fname_without_so_number() {
|
||||
LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g')
|
||||
echo "$LINKNAME"
|
||||
}
|
||||
|
||||
make_wheel_record() {
|
||||
FPATH=$1
|
||||
if echo $FPATH | grep RECORD >/dev/null 2>&1; then
|
||||
# if the RECORD file, then
|
||||
echo "$FPATH,,"
|
||||
else
|
||||
HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
|
||||
FSIZE=$(ls -nl $FPATH | awk '{print $5}')
|
||||
echo "$FPATH,sha256=$HASH,$FSIZE"
|
||||
fi
|
||||
}
|
||||
|
||||
replace_needed_sofiles() {
|
||||
find $1 -name '*.so*' | while read sofile; do
|
||||
origname=$2
|
||||
patchedname=$3
|
||||
if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
set +e
|
||||
origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*")
|
||||
ERRCODE=$?
|
||||
set -e
|
||||
if [ "$ERRCODE" -eq "0" ]; then
|
||||
echo "patching $sofile entry $origname to $patchedname"
|
||||
$PATCHELF_BIN --replace-needed $origname $patchedname $sofile
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
echo 'Built this wheel:'
|
||||
ls /tmp/$WHEELHOUSE_DIR
|
||||
mkdir -p "/$WHEELHOUSE_DIR"
|
||||
mv /tmp/$WHEELHOUSE_DIR/torch*linux*.whl /$WHEELHOUSE_DIR/
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
mv /tmp/$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/ || true
|
||||
fi
|
||||
|
||||
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
mkdir -p /$LIBTORCH_HOUSE_DIR
|
||||
mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
|
||||
rm -rf /tmp/$LIBTORCH_HOUSE_DIR
|
||||
fi
|
||||
rm -rf /tmp/$WHEELHOUSE_DIR
|
||||
rm -rf /tmp_dir
|
||||
mkdir /tmp_dir
|
||||
pushd /tmp_dir
|
||||
|
||||
for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
|
||||
|
||||
# if the glob didn't match anything
|
||||
if [[ ! -e $pkg ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
rm -rf tmp
|
||||
mkdir -p tmp
|
||||
cd tmp
|
||||
cp $pkg .
|
||||
|
||||
unzip -q $(basename $pkg)
|
||||
rm -f $(basename $pkg)
|
||||
|
||||
if [[ -d torch ]]; then
|
||||
PREFIX=torch
|
||||
else
|
||||
PREFIX=libtorch
|
||||
fi
|
||||
|
||||
if [[ $pkg != *"without-deps"* ]]; then
|
||||
# copy over needed dependent .so files over and tag them with their hash
|
||||
patched=()
|
||||
for filepath in "${DEPS_LIST[@]}"; do
|
||||
filename=$(basename $filepath)
|
||||
destpath=$PREFIX/lib/$filename
|
||||
if [[ "$filepath" != "$destpath" ]]; then
|
||||
cp $filepath $destpath
|
||||
fi
|
||||
|
||||
# ROCm workaround for roctracer dlopens
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
patchedpath=$(fname_without_so_number $destpath)
|
||||
# Keep the so number for XPU dependencies
|
||||
elif [[ "$DESIRED_CUDA" == *"xpu"* ]]; then
|
||||
patchedpath=$destpath
|
||||
else
|
||||
patchedpath=$(fname_with_sha256 $destpath)
|
||||
fi
|
||||
patchedname=$(basename $patchedpath)
|
||||
if [[ "$destpath" != "$patchedpath" ]]; then
|
||||
mv $destpath $patchedpath
|
||||
fi
|
||||
patched+=("$patchedname")
|
||||
echo "Copied $filepath to $patchedpath"
|
||||
done
|
||||
|
||||
echo "patching to fix the so names to the hashed names"
|
||||
for ((i=0;i<${#DEPS_LIST[@]};++i)); do
|
||||
replace_needed_sofiles $PREFIX ${DEPS_SONAME[i]} ${patched[i]}
|
||||
# do the same for caffe2, if it exists
|
||||
if [[ -d caffe2 ]]; then
|
||||
replace_needed_sofiles caffe2 ${DEPS_SONAME[i]} ${patched[i]}
|
||||
fi
|
||||
done
|
||||
|
||||
# copy over needed auxiliary files
|
||||
for ((i=0;i<${#DEPS_AUX_SRCLIST[@]};++i)); do
|
||||
srcpath=${DEPS_AUX_SRCLIST[i]}
|
||||
dstpath=$PREFIX/${DEPS_AUX_DSTLIST[i]}
|
||||
mkdir -p $(dirname $dstpath)
|
||||
cp $srcpath $dstpath
|
||||
done
|
||||
fi
|
||||
|
||||
# set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib
|
||||
find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do
|
||||
echo "Setting rpath of $sofile to ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'}"
|
||||
$PATCHELF_BIN --set-rpath ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'} ${FORCE_RPATH:-} $sofile
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# set RPATH of lib/ files to $ORIGIN
|
||||
find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do
|
||||
echo "Setting rpath of $sofile to ${LIB_SO_RPATH:-'$ORIGIN'}"
|
||||
$PATCHELF_BIN --set-rpath ${LIB_SO_RPATH:-'$ORIGIN'} ${FORCE_RPATH:-} $sofile
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# regenerate the RECORD file with new hashes
|
||||
record_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g')
|
||||
if [[ -e $record_file ]]; then
|
||||
echo "Generating new record file $record_file"
|
||||
: > "$record_file"
|
||||
# generate records for folders in wheel
|
||||
find * -type f | while read fname; do
|
||||
make_wheel_record "$fname" >>"$record_file"
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ $BUILD_DEBUG_INFO == "1" ]]; then
|
||||
pushd "$PREFIX/lib"
|
||||
|
||||
# Duplicate library into debug lib
|
||||
cp libtorch_cpu.so libtorch_cpu.so.dbg
|
||||
|
||||
# Keep debug symbols on debug lib
|
||||
strip --only-keep-debug libtorch_cpu.so.dbg
|
||||
|
||||
# Remove debug info from release lib
|
||||
strip --strip-debug libtorch_cpu.so
|
||||
|
||||
objcopy libtorch_cpu.so --add-gnu-debuglink=libtorch_cpu.so.dbg
|
||||
|
||||
# Zip up debug info
|
||||
mkdir -p /tmp/debug
|
||||
mv libtorch_cpu.so.dbg /tmp/debug/libtorch_cpu.so.dbg
|
||||
CRC32=$(objcopy --dump-section .gnu_debuglink=>(tail -c4 | od -t x4 -An | xargs echo) libtorch_cpu.so)
|
||||
|
||||
pushd /tmp
|
||||
PKG_NAME=$(basename "$pkg" | sed 's/\.whl$//g')
|
||||
zip /tmp/debug-whl-libtorch-"$PKG_NAME"-"$CRC32".zip /tmp/debug/libtorch_cpu.so.dbg
|
||||
cp /tmp/debug-whl-libtorch-"$PKG_NAME"-"$CRC32".zip "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
popd
|
||||
|
||||
popd
|
||||
fi
|
||||
|
||||
# zip up the wheel back
|
||||
zip -rq $(basename $pkg) $PREIX*
|
||||
|
||||
# replace original wheel
|
||||
rm -f $pkg
|
||||
mv $(basename $pkg) $pkg
|
||||
cd ..
|
||||
rm -rf tmp
|
||||
done
|
||||
|
||||
# Copy wheels to host machine for persistence before testing
|
||||
if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
cp /$LIBTORCH_HOUSE_DIR/libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
else
|
||||
cp /$WHEELHOUSE_DIR/torch*.whl "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
fi
|
||||
fi
|
||||
|
||||
# remove stuff before testing
|
||||
rm -rf /opt/rh
|
||||
if ls /usr/local/cuda* >/dev/null 2>&1; then
|
||||
rm -rf /usr/local/cuda*
|
||||
fi
|
||||
|
||||
|
||||
# Test that all the wheels work
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
export OMP_NUM_THREADS=4 # on NUMA machines this takes too long
|
||||
pushd $PYTORCH_ROOT/test
|
||||
|
||||
# Install the wheel for this Python version
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip uninstall -y "$TORCH_NO_PYTHON_PACKAGE_NAME" || true
|
||||
fi
|
||||
|
||||
pip uninstall -y "$TORCH_PACKAGE_NAME"
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
fi
|
||||
|
||||
pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
|
||||
# Print info on the libraries installed in this wheel
|
||||
# Rather than adjust find command to skip non-library files with an embedded *.so* in their name,
|
||||
# since this is only for reporting purposes, we add the || true to the ldd command.
|
||||
installed_libraries=($(find "$pydir/lib/python${py_majmin}/site-packages/torch/" -name '*.so*'))
|
||||
echo "The wheel installed all of the libraries: ${installed_libraries[@]}"
|
||||
for installed_lib in "${installed_libraries[@]}"; do
|
||||
ldd "$installed_lib" || true
|
||||
done
|
||||
|
||||
# Run the tests
|
||||
echo "$(date) :: Running tests"
|
||||
pushd "$PYTORCH_ROOT"
|
||||
|
||||
#TODO: run_tests.sh and check_binary.sh should be moved to pytorch/pytorch project
|
||||
LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
|
||||
"/builder/run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
|
||||
popd
|
||||
echo "$(date) :: Finished tests"
|
||||
fi
|
||||
99
.ci/manywheel/build_cpu.sh
Executable file
99
.ci/manywheel/build_cpu.sh
Executable file
@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_CUDA=0
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build()
|
||||
CMAKE_ARGS=()
|
||||
fi
|
||||
if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build_caffe2()
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
DIR_SUFFIX=cpu
|
||||
if [[ "$GPU_ARCH_TYPE" == "xpu" ]]; then
|
||||
DIR_SUFFIX=xpu
|
||||
# Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html
|
||||
source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
|
||||
source /opt/intel/oneapi/pti/latest/env/vars.sh
|
||||
export USE_STATIC_MKL=1
|
||||
fi
|
||||
|
||||
WHEELHOUSE_DIR="wheelhouse$DIR_SUFFIX"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_house$DIR_SUFFIX"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$DIR_SUFFIX"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$DIR_SUFFIX"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
if [[ "$(uname -m)" == "s390x" ]]; then
|
||||
LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1"
|
||||
else
|
||||
LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
|
||||
fi
|
||||
fi
|
||||
|
||||
DEPS_LIST=(
|
||||
"$LIBGOMP_PATH"
|
||||
)
|
||||
|
||||
DEPS_SONAME=(
|
||||
"libgomp.so.1"
|
||||
)
|
||||
|
||||
if [[ "$GPU_ARCH_TYPE" == "xpu" ]]; then
|
||||
echo "Bundling with xpu support package libs."
|
||||
DEPS_LIST+=(
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libsycl-preview.so.7"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libOpenCL.so.1"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libxptifw.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libsvml.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libirng.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libimf.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libintlc.so.5"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libpi_level_zero.so"
|
||||
"/opt/intel/oneapi/pti/latest/lib/libpti_view.so.0.9"
|
||||
"/opt/intel/oneapi/pti/latest/lib/libpti.so.0.9"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libsycl-preview.so.7"
|
||||
"libOpenCL.so.1"
|
||||
"libxptifw.so"
|
||||
"libsvml.so"
|
||||
"libirng.so"
|
||||
"libimf.so"
|
||||
"libintlc.so.5"
|
||||
"libpi_level_zero.so"
|
||||
"libpti_view.so.0.9"
|
||||
"libpti.so.0.9"
|
||||
)
|
||||
fi
|
||||
|
||||
rm -rf /usr/local/cuda*
|
||||
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
BUILD_SCRIPT=build_common.sh
|
||||
else
|
||||
BUILD_SCRIPT=build_libtorch.sh
|
||||
fi
|
||||
source ${SOURCE_DIR}/${BUILD_SCRIPT}
|
||||
290
.ci/manywheel/build_cuda.sh
Normal file
290
.ci/manywheel/build_cuda.sh
Normal file
@ -0,0 +1,290 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P ))"
|
||||
|
||||
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
|
||||
export NCCL_ROOT_DIR=/usr/local/cuda
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_STATIC_CUDNN=1
|
||||
export USE_STATIC_NCCL=1
|
||||
export ATEN_STATIC_CUDA=1
|
||||
export USE_CUDA_STATIC_LINK=1
|
||||
export INSTALL_TEST=0 # dont install test binaries into site-packages
|
||||
export USE_CUPTI_SO=0
|
||||
export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build()
|
||||
CMAKE_ARGS=()
|
||||
fi
|
||||
if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build_caffe2()
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
# Determine CUDA version and architectures to build for
|
||||
#
|
||||
# NOTE: We should first check `DESIRED_CUDA` when determining `CUDA_VERSION`,
|
||||
# because in some cases a single Docker image can have multiple CUDA versions
|
||||
# on it, and `nvcc --version` might not show the CUDA version we want.
|
||||
if [[ -n "$DESIRED_CUDA" ]]; then
|
||||
# If the DESIRED_CUDA already matches the format that we expect
|
||||
if [[ ${DESIRED_CUDA} =~ ^[0-9]+\.[0-9]+$ ]]; then
|
||||
CUDA_VERSION=${DESIRED_CUDA}
|
||||
else
|
||||
# cu90, cu92, cu100, cu101
|
||||
if [[ ${#DESIRED_CUDA} -eq 4 ]]; then
|
||||
CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}"
|
||||
elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then
|
||||
CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}"
|
||||
fi
|
||||
fi
|
||||
echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
|
||||
|
||||
# There really has to be a better way to do this - eli
|
||||
# Possibly limiting builds to specific cuda versions be delimiting images would be a choice
|
||||
if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
echo "Switching to CUDA version ${DESIRED_CUDA}"
|
||||
/builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
|
||||
fi
|
||||
else
|
||||
CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",")
|
||||
echo "CUDA $CUDA_VERSION Detected"
|
||||
fi
|
||||
|
||||
cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
|
||||
|
||||
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
|
||||
case ${CUDA_VERSION} in
|
||||
12.4)
|
||||
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
|
||||
TORCH_CUDA_ARCH_LIST="9.0"
|
||||
else
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
|
||||
fi
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
12.1)
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
11.8)
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7;9.0"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
11.[67])
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
*)
|
||||
echo "unknown cuda version $CUDA_VERSION"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
|
||||
echo "${TORCH_CUDA_ARCH_LIST}"
|
||||
|
||||
# Package directories
|
||||
WHEELHOUSE_DIR="wheelhouse$cuda_version_nodot"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_house$cuda_version_nodot"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$cuda_version_nodot"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$cuda_version_nodot"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
|
||||
fi
|
||||
|
||||
DEPS_LIST=(
|
||||
"$LIBGOMP_PATH"
|
||||
)
|
||||
DEPS_SONAME=(
|
||||
"libgomp.so.1"
|
||||
)
|
||||
|
||||
if [[ $USE_CUSPARSELT == "1" ]]; then
|
||||
DEPS_SONAME+=(
|
||||
"libcusparseLt.so.0"
|
||||
)
|
||||
DEPS_LIST+=(
|
||||
"/usr/local/cuda/lib64/libcusparseLt.so.0"
|
||||
)
|
||||
fi
|
||||
|
||||
if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then
|
||||
export USE_STATIC_CUDNN=0
|
||||
# Try parallelizing nvcc as well
|
||||
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
|
||||
|
||||
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
||||
echo "Bundling with cudnn and cublas."
|
||||
DEPS_LIST+=(
|
||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_cnn.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_graph.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_ops.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn.so.9"
|
||||
"/usr/local/cuda/lib64/libcublas.so.12"
|
||||
"/usr/local/cuda/lib64/libcublasLt.so.12"
|
||||
"/usr/local/cuda/lib64/libcudart.so.12"
|
||||
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.12"
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libcudnn_adv.so.9"
|
||||
"libcudnn_cnn.so.9"
|
||||
"libcudnn_graph.so.9"
|
||||
"libcudnn_ops.so.9"
|
||||
"libcudnn_engines_runtime_compiled.so.9"
|
||||
"libcudnn_engines_precompiled.so.9"
|
||||
"libcudnn_heuristic.so.9"
|
||||
"libcudnn.so.9"
|
||||
"libcublas.so.12"
|
||||
"libcublasLt.so.12"
|
||||
"libcudart.so.12"
|
||||
"libnvToolsExt.so.1"
|
||||
"libnvrtc.so.12"
|
||||
"libnvrtc-builtins.so"
|
||||
)
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
CUDA_RPATHS=(
|
||||
'$ORIGIN/../../nvidia/cublas/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_cupti/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_runtime/lib'
|
||||
'$ORIGIN/../../nvidia/cudnn/lib'
|
||||
'$ORIGIN/../../nvidia/cufft/lib'
|
||||
'$ORIGIN/../../nvidia/curand/lib'
|
||||
'$ORIGIN/../../nvidia/cusolver/lib'
|
||||
'$ORIGIN/../../nvidia/cusparse/lib'
|
||||
'$ORIGIN/../../nvidia/nccl/lib'
|
||||
'$ORIGIN/../../nvidia/nvtx/lib'
|
||||
)
|
||||
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
|
||||
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
|
||||
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
export USE_STATIC_NCCL=0
|
||||
export USE_SYSTEM_NCCL=1
|
||||
export ATEN_STATIC_CUDA=0
|
||||
export USE_CUDA_STATIC_LINK=0
|
||||
export USE_CUPTI_SO=1
|
||||
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
fi
|
||||
elif [[ $CUDA_VERSION == "11.8" ]]; then
|
||||
export USE_STATIC_CUDNN=0
|
||||
# Try parallelizing nvcc as well
|
||||
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
|
||||
# Bundle ptxas into the wheel, see https://github.com/pytorch/pytorch/pull/119750
|
||||
export BUILD_BUNDLE_PTXAS=1
|
||||
|
||||
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
||||
echo "Bundling with cudnn and cublas."
|
||||
DEPS_LIST+=(
|
||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_cnn.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_graph.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_ops.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn.so.9"
|
||||
"/usr/local/cuda/lib64/libcublas.so.11"
|
||||
"/usr/local/cuda/lib64/libcublasLt.so.11"
|
||||
"/usr/local/cuda/lib64/libcudart.so.11.0"
|
||||
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake, it links to more specific cuda version
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libcudnn_adv.so.9"
|
||||
"libcudnn_cnn.so.9"
|
||||
"libcudnn_graph.so.9"
|
||||
"libcudnn_ops.so.9"
|
||||
"libcudnn_engines_runtime_compiled.so.9"
|
||||
"libcudnn_engines_precompiled.so.9"
|
||||
"libcudnn_heuristic.so.9"
|
||||
"libcudnn.so.9"
|
||||
"libcublas.so.11"
|
||||
"libcublasLt.so.11"
|
||||
"libcudart.so.11.0"
|
||||
"libnvToolsExt.so.1"
|
||||
"libnvrtc.so.11.2"
|
||||
"libnvrtc-builtins.so.11.8"
|
||||
)
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
CUDA_RPATHS=(
|
||||
'$ORIGIN/../../nvidia/cublas/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_cupti/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_runtime/lib'
|
||||
'$ORIGIN/../../nvidia/cudnn/lib'
|
||||
'$ORIGIN/../../nvidia/cufft/lib'
|
||||
'$ORIGIN/../../nvidia/curand/lib'
|
||||
'$ORIGIN/../../nvidia/cusolver/lib'
|
||||
'$ORIGIN/../../nvidia/cusparse/lib'
|
||||
'$ORIGIN/../../nvidia/nccl/lib'
|
||||
'$ORIGIN/../../nvidia/nvtx/lib'
|
||||
)
|
||||
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
|
||||
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
|
||||
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
export USE_STATIC_NCCL=0
|
||||
export USE_SYSTEM_NCCL=1
|
||||
export ATEN_STATIC_CUDA=0
|
||||
export USE_CUDA_STATIC_LINK=0
|
||||
export USE_CUPTI_SO=1
|
||||
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
fi
|
||||
else
|
||||
echo "Unknown cuda version $CUDA_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# builder/test.sh requires DESIRED_CUDA to know what tests to exclude
|
||||
export DESIRED_CUDA="$cuda_version_nodot"
|
||||
|
||||
# Switch `/usr/local/cuda` to the desired CUDA version
|
||||
rm -rf /usr/local/cuda || true
|
||||
ln -s "/usr/local/cuda-${CUDA_VERSION}" /usr/local/cuda
|
||||
|
||||
# Switch `/usr/local/magma` to the desired CUDA version
|
||||
rm -rf /usr/local/magma || true
|
||||
ln -s /usr/local/cuda-${CUDA_VERSION}/magma /usr/local/magma
|
||||
|
||||
export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) # 10.0.130
|
||||
export CUDA_VERSION_SHORT=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev | cut -f1,2 -d".") # 10.0
|
||||
export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev)
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
BUILD_SCRIPT=build_common.sh
|
||||
else
|
||||
BUILD_SCRIPT=build_libtorch.sh
|
||||
fi
|
||||
source $SCRIPTPATH/${BUILD_SCRIPT}
|
||||
353
.ci/manywheel/build_libtorch.sh
Normal file
353
.ci/manywheel/build_libtorch.sh
Normal file
@ -0,0 +1,353 @@
|
||||
#!/usr/bin/env bash
|
||||
# meant to be called only from the neighboring build.sh and build_cpu.sh scripts
|
||||
|
||||
set -e pipefail
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
|
||||
# Require only one python installation
|
||||
if [[ -z "$DESIRED_PYTHON" ]]; then
|
||||
echo "Need to set DESIRED_PYTHON env variable"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -n "$BUILD_PYTHONLESS" && -z "$LIBTORCH_VARIANT" ]]; then
|
||||
echo "BUILD_PYTHONLESS is set, so need LIBTORCH_VARIANT to also be set"
|
||||
echo "LIBTORCH_VARIANT should be one of shared-with-deps shared-without-deps static-with-deps static-without-deps"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to retry functions that sometimes timeout or have flaky failures
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
# TODO move this into the Docker images
|
||||
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
retry dnf install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
# TODO: Remove this once nvidia package repos are back online
|
||||
# Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
|
||||
# shellcheck disable=SC2046
|
||||
sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
|
||||
retry apt-get update
|
||||
retry apt-get -y install zip openssl
|
||||
fi
|
||||
|
||||
# Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if
|
||||
# PYTORCH_BUILD_NUMBER > 1
|
||||
build_version="$PYTORCH_BUILD_VERSION"
|
||||
build_number="$PYTORCH_BUILD_NUMBER"
|
||||
if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then
|
||||
# This will be the *exact* version, since build_number<1
|
||||
build_version="$OVERRIDE_PACKAGE_VERSION"
|
||||
build_number=0
|
||||
fi
|
||||
if [[ -z "$build_version" ]]; then
|
||||
build_version=1.0.0
|
||||
fi
|
||||
if [[ -z "$build_number" ]]; then
|
||||
build_number=1
|
||||
fi
|
||||
export PYTORCH_BUILD_VERSION=$build_version
|
||||
export PYTORCH_BUILD_NUMBER=$build_number
|
||||
|
||||
export CMAKE_LIBRARY_PATH="/opt/intel/lib:/lib:$CMAKE_LIBRARY_PATH"
|
||||
export CMAKE_INCLUDE_PATH="/opt/intel/include:$CMAKE_INCLUDE_PATH"
|
||||
|
||||
# set OPENSSL_ROOT_DIR=/opt/openssl if it exists
|
||||
if [[ -e /opt/openssl ]]; then
|
||||
export OPENSSL_ROOT_DIR=/opt/openssl
|
||||
export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH
|
||||
fi
|
||||
|
||||
# If given a python version like 3.6m or 2.7mu, convert this to the format we
|
||||
# expect. The binary CI jobs pass in python versions like this; they also only
|
||||
# ever pass one python version, so we assume that DESIRED_PYTHON is not a list
|
||||
# in this case
|
||||
if [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then
|
||||
python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)"
|
||||
DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}"
|
||||
fi
|
||||
pydir="/opt/python/$DESIRED_PYTHON"
|
||||
export PATH="$pydir/bin:$PATH"
|
||||
|
||||
export PATCHELF_BIN=/usr/local/bin/patchelf
|
||||
patchelf_version=`$PATCHELF_BIN --version`
|
||||
echo "patchelf version: " $patchelf_version
|
||||
if [[ "$patchelf_version" == "patchelf 0.9" ]]; then
|
||||
echo "Your patchelf version is too old. Please use version >= 0.10."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
########################################################
|
||||
# Compile wheels as well as libtorch
|
||||
#######################################################
|
||||
if [[ -z "$PYTORCH_ROOT" ]]; then
|
||||
echo "Need to set PYTORCH_ROOT env variable"
|
||||
exit 1
|
||||
fi
|
||||
pushd "$PYTORCH_ROOT"
|
||||
python setup.py clean
|
||||
retry pip install -qr requirements.txt
|
||||
retry pip install -q numpy==2.0.1
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
# TODO remove this work-around once pytorch sources are updated
|
||||
export ROCclr_DIR=/opt/rocm/rocclr/lib/cmake/rocclr
|
||||
fi
|
||||
|
||||
echo "Calling setup.py install at $(date)"
|
||||
|
||||
if [[ $LIBTORCH_VARIANT = *"static"* ]]; then
|
||||
STATIC_CMAKE_FLAG="-DTORCH_STATIC=1"
|
||||
fi
|
||||
|
||||
(
|
||||
set -x
|
||||
|
||||
mkdir -p build
|
||||
|
||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \
|
||||
# TODO: Remove this flag once https://github.com/pytorch/pytorch/issues/55952 is closed
|
||||
CFLAGS='-Wno-deprecated-declarations' \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=1 \
|
||||
python setup.py install
|
||||
|
||||
mkdir -p libtorch/{lib,bin,include,share}
|
||||
|
||||
# Make debug folder separate so it doesn't get zipped up with the rest of
|
||||
# libtorch
|
||||
mkdir debug
|
||||
|
||||
# Copy over all lib files
|
||||
cp -rv build/lib/* libtorch/lib/
|
||||
cp -rv build/lib*/torch/lib/* libtorch/lib/
|
||||
|
||||
# Copy over all include files
|
||||
cp -rv build/include/* libtorch/include/
|
||||
cp -rv build/lib*/torch/include/* libtorch/include/
|
||||
|
||||
# Copy over all of the cmake files
|
||||
cp -rv build/lib*/torch/share/* libtorch/share/
|
||||
|
||||
# Split libtorch into debug / release version
|
||||
cp libtorch/lib/libtorch_cpu.so libtorch/lib/libtorch_cpu.so.dbg
|
||||
|
||||
# Keep debug symbols on debug lib
|
||||
strip --only-keep-debug libtorch/lib/libtorch_cpu.so.dbg
|
||||
|
||||
# Remove debug info from release lib
|
||||
strip --strip-debug libtorch/lib/libtorch_cpu.so
|
||||
|
||||
# Add a debug link to the release lib to the debug lib (debuggers will then
|
||||
# search for symbols in a file called libtorch_cpu.so.dbg in some
|
||||
# predetermined locations) and embed a CRC32 of the debug library into the .so
|
||||
cd libtorch/lib
|
||||
|
||||
objcopy libtorch_cpu.so --add-gnu-debuglink=libtorch_cpu.so.dbg
|
||||
cd ../..
|
||||
|
||||
# Move the debug symbols to its own directory so it doesn't get processed /
|
||||
# zipped with all the other libraries
|
||||
mv libtorch/lib/libtorch_cpu.so.dbg debug/libtorch_cpu.so.dbg
|
||||
|
||||
echo "${PYTORCH_BUILD_VERSION}" > libtorch/build-version
|
||||
echo "$(pushd $PYTORCH_ROOT && git rev-parse HEAD)" > libtorch/build-hash
|
||||
|
||||
)
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
(
|
||||
set -x
|
||||
|
||||
mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
|
||||
|
||||
# objcopy installs a CRC32 into libtorch_cpu above so, so add that to the name here
|
||||
CRC32=$(objcopy --dump-section .gnu_debuglink=>(tail -c4 | od -t x4 -An | xargs echo) libtorch/lib/libtorch_cpu.so)
|
||||
|
||||
# Zip debug symbols
|
||||
zip /tmp/$LIBTORCH_HOUSE_DIR/debug-libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION-$CRC32.zip debug/libtorch_cpu.so.dbg
|
||||
|
||||
# Zip and copy libtorch
|
||||
zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
|
||||
cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
|
||||
/tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
|
||||
)
|
||||
|
||||
|
||||
popd
|
||||
|
||||
#######################################################################
|
||||
# ADD DEPENDENCIES INTO THE WHEEL
|
||||
#
|
||||
# auditwheel repair doesn't work correctly and is buggy
|
||||
# so manually do the work of copying dependency libs and patchelfing
|
||||
# and fixing RECORDS entries correctly
|
||||
######################################################################
|
||||
|
||||
fname_with_sha256() {
|
||||
HASH=$(sha256sum $1 | cut -c1-8)
|
||||
DIRNAME=$(dirname $1)
|
||||
BASENAME=$(basename $1)
|
||||
if [[ $BASENAME == "libnvrtc-builtins.so" || $BASENAME == "libcudnn"* ]]; then
|
||||
echo $1
|
||||
else
|
||||
INITNAME=$(echo $BASENAME | cut -f1 -d".")
|
||||
ENDNAME=$(echo $BASENAME | cut -f 2- -d".")
|
||||
echo "$DIRNAME/$INITNAME-$HASH.$ENDNAME"
|
||||
fi
|
||||
}
|
||||
|
||||
fname_without_so_number() {
|
||||
LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g')
|
||||
echo "$LINKNAME"
|
||||
}
|
||||
|
||||
make_wheel_record() {
|
||||
FPATH=$1
|
||||
if echo $FPATH | grep RECORD >/dev/null 2>&1; then
|
||||
# if the RECORD file, then
|
||||
echo "$FPATH,,"
|
||||
else
|
||||
HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
|
||||
FSIZE=$(ls -nl $FPATH | awk '{print $5}')
|
||||
echo "$FPATH,sha256=$HASH,$FSIZE"
|
||||
fi
|
||||
}
|
||||
|
||||
echo 'Built this package:'
|
||||
(
|
||||
set -x
|
||||
mkdir -p /$LIBTORCH_HOUSE_DIR
|
||||
mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
|
||||
rm -rf /tmp/$LIBTORCH_HOUSE_DIR
|
||||
)
|
||||
TMP_DIR=$(mktemp -d)
|
||||
trap "rm -rf ${TMP_DIR}" EXIT
|
||||
pushd "${TMP_DIR}"
|
||||
|
||||
for pkg in /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
|
||||
|
||||
# if the glob didn't match anything
|
||||
if [[ ! -e $pkg ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
rm -rf tmp
|
||||
mkdir -p tmp
|
||||
cd tmp
|
||||
cp $pkg .
|
||||
|
||||
unzip -q $(basename $pkg)
|
||||
rm -f $(basename $pkg)
|
||||
|
||||
PREFIX=libtorch
|
||||
|
||||
if [[ $pkg != *"without-deps"* ]]; then
|
||||
# copy over needed dependent .so files over and tag them with their hash
|
||||
patched=()
|
||||
for filepath in "${DEPS_LIST[@]}"; do
|
||||
filename=$(basename $filepath)
|
||||
destpath=$PREFIX/lib/$filename
|
||||
if [[ "$filepath" != "$destpath" ]]; then
|
||||
cp $filepath $destpath
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
patchedpath=$(fname_without_so_number $destpath)
|
||||
else
|
||||
patchedpath=$(fname_with_sha256 $destpath)
|
||||
fi
|
||||
patchedname=$(basename $patchedpath)
|
||||
if [[ "$destpath" != "$patchedpath" ]]; then
|
||||
mv $destpath $patchedpath
|
||||
fi
|
||||
patched+=("$patchedname")
|
||||
echo "Copied $filepath to $patchedpath"
|
||||
done
|
||||
|
||||
echo "patching to fix the so names to the hashed names"
|
||||
for ((i=0;i<${#DEPS_LIST[@]};++i)); do
|
||||
find $PREFIX -name '*.so*' | while read sofile; do
|
||||
origname=${DEPS_SONAME[i]}
|
||||
patchedname=${patched[i]}
|
||||
if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
set +e
|
||||
origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*")
|
||||
ERRCODE=$?
|
||||
set -e
|
||||
if [ "$ERRCODE" -eq "0" ]; then
|
||||
echo "patching $sofile entry $origname to $patchedname"
|
||||
$PATCHELF_BIN --replace-needed $origname $patchedname $sofile
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# copy over needed auxiliary files
|
||||
for ((i=0;i<${#DEPS_AUX_SRCLIST[@]};++i)); do
|
||||
srcpath=${DEPS_AUX_SRCLIST[i]}
|
||||
dstpath=$PREFIX/${DEPS_AUX_DSTLIST[i]}
|
||||
mkdir -p $(dirname $dstpath)
|
||||
cp $srcpath $dstpath
|
||||
done
|
||||
fi
|
||||
|
||||
# set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib
|
||||
find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do
|
||||
echo "Setting rpath of $sofile to " '$ORIGIN:$ORIGIN/lib'
|
||||
$PATCHELF_BIN --set-rpath '$ORIGIN:$ORIGIN/lib' $sofile
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# set RPATH of lib/ files to $ORIGIN
|
||||
find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do
|
||||
echo "Setting rpath of $sofile to " '$ORIGIN'
|
||||
$PATCHELF_BIN --set-rpath '$ORIGIN' $sofile
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# regenerate the RECORD file with new hashes
|
||||
record_file=`echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g'`
|
||||
if [[ -e $record_file ]]; then
|
||||
echo "Generating new record file $record_file"
|
||||
rm -f $record_file
|
||||
# generate records for folders in wheel
|
||||
find * -type f | while read fname; do
|
||||
echo $(make_wheel_record $fname) >>$record_file
|
||||
done
|
||||
fi
|
||||
|
||||
# zip up the wheel back
|
||||
zip -rq $(basename $pkg) $PREFIX*
|
||||
|
||||
# replace original wheel
|
||||
rm -f $pkg
|
||||
mv $(basename $pkg) $pkg
|
||||
cd ..
|
||||
rm -rf tmp
|
||||
done
|
||||
|
||||
# Copy wheels to host machine for persistence before testing
|
||||
if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
cp /$LIBTORCH_HOUSE_DIR/libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
cp /$LIBTORCH_HOUSE_DIR/debug-libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
fi
|
||||
263
.ci/manywheel/build_rocm.sh
Executable file
263
.ci/manywheel/build_rocm.sh
Executable file
@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
export ROCM_HOME=/opt/rocm
|
||||
export MAGMA_HOME=$ROCM_HOME/magma
|
||||
# TODO: libtorch_cpu.so is broken when building with Debug info
|
||||
export BUILD_DEBUG_INFO=0
|
||||
|
||||
# TODO Are these all used/needed?
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_STATIC_CUDNN=1
|
||||
export USE_STATIC_NCCL=1
|
||||
export ATEN_STATIC_CUDA=1
|
||||
export USE_CUDA_STATIC_LINK=1
|
||||
export INSTALL_TEST=0 # dont install test binaries into site-packages
|
||||
# Set RPATH instead of RUNPATH when using patchelf to avoid LD_LIBRARY_PATH override
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build()
|
||||
CMAKE_ARGS=()
|
||||
fi
|
||||
if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build_caffe2()
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
# Determine ROCm version and architectures to build for
|
||||
#
|
||||
# NOTE: We should first check `DESIRED_CUDA` when determining `ROCM_VERSION`
|
||||
if [[ -n "$DESIRED_CUDA" ]]; then
|
||||
if ! echo "${DESIRED_CUDA}"| grep "^rocm" >/dev/null 2>/dev/null; then
|
||||
export DESIRED_CUDA="rocm${DESIRED_CUDA}"
|
||||
fi
|
||||
# rocm3.7, rocm3.5.1
|
||||
ROCM_VERSION="$DESIRED_CUDA"
|
||||
echo "Using $ROCM_VERSION as determined by DESIRED_CUDA"
|
||||
else
|
||||
echo "Must set DESIRED_CUDA"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Package directories
|
||||
WHEELHOUSE_DIR="wheelhouse$ROCM_VERSION"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_house$ROCM_VERSION"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$ROCM_VERSION"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$ROCM_VERSION"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
|
||||
# To make version comparison easier, create an integer representation.
|
||||
ROCM_VERSION_CLEAN=$(echo ${ROCM_VERSION} | sed s/rocm//)
|
||||
save_IFS="$IFS"
|
||||
IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION_CLEAN})
|
||||
IFS="$save_IFS"
|
||||
if [[ ${#ROCM_VERSION_ARRAY[@]} == 2 ]]; then
|
||||
ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
|
||||
ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
|
||||
ROCM_VERSION_PATCH=0
|
||||
elif [[ ${#ROCM_VERSION_ARRAY[@]} == 3 ]]; then
|
||||
ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
|
||||
ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
|
||||
ROCM_VERSION_PATCH=${ROCM_VERSION_ARRAY[2]}
|
||||
else
|
||||
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
|
||||
|
||||
# Required ROCm libraries
|
||||
ROCM_SO_FILES=(
|
||||
"libMIOpen.so"
|
||||
"libamdhip64.so"
|
||||
"libhipblas.so"
|
||||
"libhipfft.so"
|
||||
"libhiprand.so"
|
||||
"libhipsolver.so"
|
||||
"libhipsparse.so"
|
||||
"libhsa-runtime64.so"
|
||||
"libamd_comgr.so"
|
||||
"libmagma.so"
|
||||
"librccl.so"
|
||||
"librocblas.so"
|
||||
"librocfft.so"
|
||||
"librocm_smi64.so"
|
||||
"librocrand.so"
|
||||
"librocsolver.so"
|
||||
"librocsparse.so"
|
||||
"libroctracer64.so"
|
||||
"libroctx64.so"
|
||||
"libhipblaslt.so"
|
||||
"libhiprtc.so"
|
||||
)
|
||||
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
ROCM_SO_FILES+=("librocprofiler-register.so")
|
||||
fi
|
||||
|
||||
if [[ $ROCM_INT -ge 60200 ]]; then
|
||||
ROCM_SO_FILES+=("librocm-core.so")
|
||||
fi
|
||||
|
||||
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
LIBNUMA_PATH="/usr/lib64/libnuma.so.1"
|
||||
LIBELF_PATH="/usr/lib64/libelf.so.1"
|
||||
LIBTINFO_PATH="/usr/lib64/libtinfo.so.5"
|
||||
LIBDRM_PATH="/opt/amdgpu/lib64/libdrm.so.2"
|
||||
LIBDRM_AMDGPU_PATH="/opt/amdgpu/lib64/libdrm_amdgpu.so.1"
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
# Below libs are direct dependencies of libhipsolver
|
||||
LIBSUITESPARSE_CONFIG_PATH="/lib64/libsuitesparseconfig.so.4"
|
||||
LIBCHOLMOD_PATH="/lib64/libcholmod.so.2"
|
||||
# Below libs are direct dependencies of libcholmod
|
||||
LIBAMD_PATH="/lib64/libamd.so.2"
|
||||
LIBCAMD_PATH="/lib64/libcamd.so.2"
|
||||
LIBCCOLAMD_PATH="/lib64/libccolamd.so.2"
|
||||
LIBCOLAMD_PATH="/lib64/libcolamd.so.2"
|
||||
LIBSATLAS_PATH="/lib64/atlas/libsatlas.so.3"
|
||||
# Below libs are direct dependencies of libsatlas
|
||||
LIBGFORTRAN_PATH="/lib64/libgfortran.so.3"
|
||||
LIBQUADMATH_PATH="/lib64/libquadmath.so.0"
|
||||
fi
|
||||
MAYBE_LIB64=lib64
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
|
||||
LIBNUMA_PATH="/usr/lib/x86_64-linux-gnu/libnuma.so.1"
|
||||
LIBELF_PATH="/usr/lib/x86_64-linux-gnu/libelf.so.1"
|
||||
if [[ $ROCM_INT -ge 50300 ]]; then
|
||||
LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.6"
|
||||
else
|
||||
LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.5"
|
||||
fi
|
||||
LIBDRM_PATH="/usr/lib/x86_64-linux-gnu/libdrm.so.2"
|
||||
LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1"
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
# Below libs are direct dependencies of libhipsolver
|
||||
LIBCHOLMOD_PATH="/lib/x86_64-linux-gnu/libcholmod.so.3"
|
||||
# Below libs are direct dependencies of libcholmod
|
||||
LIBSUITESPARSE_CONFIG_PATH="/lib/x86_64-linux-gnu/libsuitesparseconfig.so.5"
|
||||
LIBAMD_PATH="/lib/x86_64-linux-gnu/libamd.so.2"
|
||||
LIBCAMD_PATH="/lib/x86_64-linux-gnu/libcamd.so.2"
|
||||
LIBCCOLAMD_PATH="/lib/x86_64-linux-gnu/libccolamd.so.2"
|
||||
LIBCOLAMD_PATH="/lib/x86_64-linux-gnu/libcolamd.so.2"
|
||||
LIBMETIS_PATH="/lib/x86_64-linux-gnu/libmetis.so.5"
|
||||
LIBLAPACK_PATH="/lib/x86_64-linux-gnu/liblapack.so.3"
|
||||
LIBBLAS_PATH="/lib/x86_64-linux-gnu/libblas.so.3"
|
||||
# Below libs are direct dependencies of libblas
|
||||
LIBGFORTRAN_PATH="/lib/x86_64-linux-gnu/libgfortran.so.5"
|
||||
LIBQUADMATH_PATH="/lib/x86_64-linux-gnu/libquadmath.so.0"
|
||||
fi
|
||||
MAYBE_LIB64=lib
|
||||
fi
|
||||
OS_SO_PATHS=($LIBGOMP_PATH $LIBNUMA_PATH\
|
||||
$LIBELF_PATH $LIBTINFO_PATH\
|
||||
$LIBDRM_PATH $LIBDRM_AMDGPU_PATH\
|
||||
$LIBSUITESPARSE_CONFIG_PATH\
|
||||
$LIBCHOLMOD_PATH $LIBAMD_PATH\
|
||||
$LIBCAMD_PATH $LIBCCOLAMD_PATH\
|
||||
$LIBCOLAMD_PATH $LIBSATLAS_PATH\
|
||||
$LIBGFORTRAN_PATH $LIBQUADMATH_PATH\
|
||||
$LIBMETIS_PATH $LIBLAPACK_PATH\
|
||||
$LIBBLAS_PATH)
|
||||
OS_SO_FILES=()
|
||||
for lib in "${OS_SO_PATHS[@]}"
|
||||
do
|
||||
file_name="${lib##*/}" # Substring removal of path to get filename
|
||||
OS_SO_FILES[${#OS_SO_FILES[@]}]=$file_name # Append lib to array
|
||||
done
|
||||
|
||||
# PyTorch-version specific
|
||||
# AOTriton dependency only for PyTorch >= 2.4
|
||||
if (( $(echo "${PYTORCH_VERSION} 2.4" | awk '{print ($1 >= $2)}') )); then
|
||||
ROCM_SO_FILES+=("libaotriton_v2.so")
|
||||
fi
|
||||
|
||||
# rocBLAS library files
|
||||
ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
|
||||
ROCBLAS_LIB_DST=lib/rocblas/library
|
||||
ARCH=$(echo $PYTORCH_ROCM_ARCH | sed 's/;/|/g') # Replace ; seperated arch list to bar for grep
|
||||
ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
|
||||
OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
|
||||
ROCBLAS_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
|
||||
|
||||
# hipblaslt library files
|
||||
HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library
|
||||
HIPBLASLT_LIB_DST=lib/hipblaslt/library
|
||||
ARCH_SPECIFIC_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -E $ARCH)
|
||||
OTHER_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -v gfx)
|
||||
HIPBLASLT_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
|
||||
|
||||
# ROCm library files
|
||||
ROCM_SO_PATHS=()
|
||||
for lib in "${ROCM_SO_FILES[@]}"
|
||||
do
|
||||
file_path=($(find $ROCM_HOME/lib/ -name "$lib")) # First search in lib
|
||||
if [[ -z $file_path ]]; then
|
||||
if [ -d "$ROCM_HOME/lib64/" ]; then
|
||||
file_path=($(find $ROCM_HOME/lib64/ -name "$lib")) # Then search in lib64
|
||||
fi
|
||||
fi
|
||||
if [[ -z $file_path ]]; then
|
||||
file_path=($(find $ROCM_HOME/ -name "$lib")) # Then search in ROCM_HOME
|
||||
fi
|
||||
if [[ -z $file_path ]]; then
|
||||
echo "Error: Library file $lib is not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
ROCM_SO_PATHS[${#ROCM_SO_PATHS[@]}]="$file_path" # Append lib to array
|
||||
done
|
||||
|
||||
DEPS_LIST=(
|
||||
${ROCM_SO_PATHS[*]}
|
||||
${OS_SO_PATHS[*]}
|
||||
)
|
||||
|
||||
DEPS_SONAME=(
|
||||
${ROCM_SO_FILES[*]}
|
||||
${OS_SO_FILES[*]}
|
||||
)
|
||||
|
||||
DEPS_AUX_SRCLIST=(
|
||||
"${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_SRC/}"
|
||||
"${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_SRC/}"
|
||||
"/opt/amdgpu/share/libdrm/amdgpu.ids"
|
||||
)
|
||||
|
||||
DEPS_AUX_DSTLIST=(
|
||||
"${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_DST/}"
|
||||
"${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_DST/}"
|
||||
"share/libdrm/amdgpu.ids"
|
||||
)
|
||||
|
||||
# MIOpen library files
|
||||
MIOPEN_SHARE_SRC=$ROCM_HOME/share/miopen/db
|
||||
MIOPEN_SHARE_DST=share/miopen/db
|
||||
MIOPEN_SHARE_FILES=($(ls $MIOPEN_SHARE_SRC | grep -E $ARCH))
|
||||
DEPS_AUX_SRCLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/})
|
||||
DEPS_AUX_DSTLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/})
|
||||
|
||||
# RCCL library files
|
||||
RCCL_SHARE_SRC=$ROCM_HOME/share/rccl/msccl-algorithms
|
||||
RCCL_SHARE_DST=share/rccl/msccl-algorithms
|
||||
RCCL_SHARE_FILES=($(ls $RCCL_SHARE_SRC))
|
||||
DEPS_AUX_SRCLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_SRC/})
|
||||
DEPS_AUX_DSTLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_DST/})
|
||||
|
||||
echo "PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH}"
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
BUILD_SCRIPT=build_common.sh
|
||||
else
|
||||
BUILD_SCRIPT=build_libtorch.sh
|
||||
fi
|
||||
source $SCRIPTPATH/${BUILD_SCRIPT}
|
||||
26
.ci/manywheel/test_wheel.sh
Executable file
26
.ci/manywheel/test_wheel.sh
Executable file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
yum install -y wget git
|
||||
|
||||
rm -rf /usr/local/cuda*
|
||||
|
||||
# Install Anaconda
|
||||
if ! ls /py
|
||||
then
|
||||
echo "Miniconda needs to be installed"
|
||||
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
|
||||
bash ~/miniconda.sh -b -p /py
|
||||
else
|
||||
echo "Miniconda is already installed"
|
||||
fi
|
||||
|
||||
export PATH="/py/bin:$PATH"
|
||||
|
||||
# Anaconda token
|
||||
if ls /remote/token
|
||||
then
|
||||
source /remote/token
|
||||
fi
|
||||
|
||||
conda install -y conda-build anaconda-client
|
||||
@ -205,7 +205,8 @@ fi
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
|
||||
export USE_CUDA=0
|
||||
export USE_ASAN=1
|
||||
export UBSAN_FLAGS="-fno-sanitize-recover=all;-fno-sanitize=float-divide-by-zero;-fno-sanitize=float-cast-overflow"
|
||||
export REL_WITH_DEB_INFO=1
|
||||
export UBSAN_FLAGS="-fno-sanitize-recover=all"
|
||||
unset USE_LLVM
|
||||
fi
|
||||
|
||||
@ -273,7 +274,6 @@ else
|
||||
# set only when building other architectures
|
||||
# or building non-XLA tests.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
|
||||
"$BUILD_ENVIRONMENT" != *s390x* &&
|
||||
"$BUILD_ENVIRONMENT" != *xla* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
|
||||
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
|
||||
|
||||
@ -320,7 +320,6 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
|
||||
python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
|
||||
python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose
|
||||
@ -332,7 +331,6 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py -k test_clip_grad_norm_2d --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_compile.py --verbose
|
||||
python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose
|
||||
|
||||
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
|
||||
|
||||
@ -46,6 +46,9 @@ python -m pip install tlparse==0.3.25
|
||||
# Install parameterized
|
||||
python -m pip install parameterized==0.8.1
|
||||
|
||||
# Install pulp for testing ilps under torch\distributed\_tools
|
||||
python -m pip install pulp==2.9.0
|
||||
|
||||
run_tests() {
|
||||
# Run nvidia-smi if available
|
||||
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
|
||||
|
||||
2
.github/actions/setup-linux/action.yml
vendored
2
.github/actions/setup-linux/action.yml
vendored
@ -20,7 +20,7 @@ runs:
|
||||
elif [[ $runner_name_str == *"gcp"* ]]; then
|
||||
echo "Runner is from Google Cloud Platform, No info on ec2 metadata"
|
||||
else
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
fi
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
|
||||
2
.github/actions/setup-win/action.yml
vendored
2
.github/actions/setup-win/action.yml
vendored
@ -18,7 +18,7 @@ runs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
||||
3f0569939c4369bec943fc27d1c9d8dfbc828c26
|
||||
79047bf6bdec9e32c4cffd0f9835b347781fefbf
|
||||
|
||||
251
.github/lf-canary-scale-config.yml
vendored
251
.github/lf-canary-scale-config.yml
vendored
@ -1,251 +0,0 @@
|
||||
|
||||
# This file is generated by .github/scripts/validate_scale_config.py in test-infra
|
||||
# It defines runner types that will be provisioned by by LF Self-hosted runners
|
||||
|
||||
# scale-config.yml:
|
||||
# Powers what instance types are available for GHA auto-scaled
|
||||
# runners. Runners listed here will be available as self hosted
|
||||
# runners, configuration is directly pulled from the main branch.
|
||||
#
|
||||
#
|
||||
# NOTES:
|
||||
# - Linux runners are by default non-ephemeral to reduce the amount of CreateInstaces calls
|
||||
# to avoid RequestLimitExceeded issues
|
||||
# - When updating this file, run the following command to validate the YAML and to generate
|
||||
# corresponding versions of scale-config for the pytorch/pytorch repo and merge the
|
||||
# pytorch/pytorch changes before merging these changes.
|
||||
# `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path_to_test-infra_root] --pytorch-repo-root [path_to_pytorch_root]``
|
||||
#
|
||||
# TODO: Add some documentation on how the auto-scaling works
|
||||
#
|
||||
# NOTE: Default values,
|
||||
#
|
||||
# runner_types:
|
||||
# runner_label:
|
||||
# instance_type: m4.large
|
||||
# os: linux
|
||||
# max_available: 20
|
||||
# disk_size: 50
|
||||
# is_ephemeral: true
|
||||
|
||||
runner_types:
|
||||
lf.c.linux.12xlarge:
|
||||
disk_size: 200
|
||||
instance_type: c5.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.10xlarge.avx2:
|
||||
disk_size: 200
|
||||
instance_type: m4.10xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 450
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.24xl.spr-metal:
|
||||
disk_size: 200
|
||||
instance_type: c7i.metal-24xl
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.16xlarge.spr:
|
||||
disk_size: 200
|
||||
instance_type: c7i.16xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.9xlarge.ephemeral:
|
||||
disk_size: 200
|
||||
instance_type: c5.9xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 50
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
variants:
|
||||
am2:
|
||||
ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
|
||||
lf.c.linux.12xlarge.ephemeral:
|
||||
disk_size: 200
|
||||
instance_type: c5.12xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 300
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.16xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.16xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.24xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.24xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 500
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.24xlarge.ephemeral:
|
||||
disk_size: 150
|
||||
instance_type: c5.24xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.2xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 3120
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.4xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.4xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.8xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.8xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 400
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g4dn.12xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g4dn.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 250
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g4dn.metal.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g4dn.metal
|
||||
is_ephemeral: false
|
||||
max_available: 300
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g5.48xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.48xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g5.12xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g5.4xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 2400
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g6.4xlarge.experimental.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g6.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 50
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.large:
|
||||
max_available: 1200
|
||||
disk_size: 15
|
||||
instance_type: c5.large
|
||||
is_ephemeral: false
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.arm64.2xlarge:
|
||||
disk_size: 256
|
||||
instance_type: t4g.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.linux.arm64.m7g.4xlarge:
|
||||
disk_size: 256
|
||||
instance_type: m7g.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.linux.arm64.2xlarge.ephemeral:
|
||||
disk_size: 256
|
||||
instance_type: t4g.2xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.linux.arm64.m7g.4xlarge.ephemeral:
|
||||
disk_size: 256
|
||||
instance_type: m7g.4xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.linux.arm64.m7g.metal:
|
||||
disk_size: 256
|
||||
instance_type: m7g.metal
|
||||
is_ephemeral: false
|
||||
max_available: 100
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.windows.g4dn.xlarge:
|
||||
disk_size: 256
|
||||
instance_type: g4dn.xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 100
|
||||
os: windows
|
||||
lf.c.windows.g4dn.xlarge.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: g4dn.xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 100
|
||||
os: windows
|
||||
lf.c.windows.4xlarge:
|
||||
disk_size: 256
|
||||
instance_type: c5d.4xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 420
|
||||
os: windows
|
||||
lf.c.windows.4xlarge.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: c5d.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 420
|
||||
os: windows
|
||||
lf.c.windows.8xlarge.nvidia.gpu:
|
||||
disk_size: 256
|
||||
instance_type: p3.2xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 300
|
||||
os: windows
|
||||
lf.c.windows.8xlarge.nvidia.gpu.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: p3.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: windows
|
||||
lf.c.windows.g5.4xlarge.nvidia.gpu:
|
||||
disk_size: 256
|
||||
instance_type: g5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 250
|
||||
os: windows
|
||||
4
.github/lf-scale-config.yml
vendored
4
.github/lf-scale-config.yml
vendored
@ -33,7 +33,7 @@ runner_types:
|
||||
disk_size: 200
|
||||
instance_type: c5.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
max_available: 2000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.10xlarge.avx2:
|
||||
@ -241,7 +241,7 @@ runner_types:
|
||||
disk_size: 256
|
||||
instance_type: p3.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
max_available: 300
|
||||
os: windows
|
||||
lf.windows.g5.4xlarge.nvidia.gpu:
|
||||
disk_size: 256
|
||||
|
||||
1
.github/pytorch-probot.yml
vendored
1
.github/pytorch-probot.yml
vendored
@ -22,6 +22,7 @@ ciflow_push_tags:
|
||||
- ciflow/unstable
|
||||
- ciflow/xpu
|
||||
- ciflow/torchbench
|
||||
- ciflow/autoformat
|
||||
retryable_workflows:
|
||||
- pull
|
||||
- trunk
|
||||
|
||||
3
.github/scripts/lintrunner.sh
vendored
3
.github/scripts/lintrunner.sh
vendored
@ -41,7 +41,8 @@ RC=0
|
||||
if ! lintrunner --force-color --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
|
||||
echo ""
|
||||
echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner -m origin/main\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
|
||||
echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m"
|
||||
echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions. To apply suggested patches automatically, use the -a flag. Before pushing another commit,\e[0m"
|
||||
echo -e "\e[1m\e[36mplease verify locally and ensure everything passes.\e[0m"
|
||||
RC=1
|
||||
fi
|
||||
|
||||
|
||||
2
.github/templates/common.yml.j2
vendored
2
.github/templates/common.yml.j2
vendored
@ -25,7 +25,7 @@ concurrency:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
2
.github/workflows/_binary-build-linux.yml
vendored
2
.github/workflows/_binary-build-linux.yml
vendored
@ -272,6 +272,8 @@ jobs:
|
||||
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"aarch64"* ]]; then
|
||||
docker exec -t "${container_name}" bash -c "bash /builder/aarch64_linux/aarch64_ci_build.sh"
|
||||
elif [[ ${{ inputs.PACKAGE_TYPE }} == "manywheel" || ${{ inputs.PACKAGE_TYPE }} == "libtorch" ]]; then
|
||||
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/${{ inputs.PACKAGE_TYPE }}/build.sh"
|
||||
else
|
||||
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/${{ inputs.PACKAGE_TYPE }}/build.sh"
|
||||
fi
|
||||
|
||||
64
.github/workflows/generated-windows-binary-conda-nightly.yml
generated
vendored
64
.github/workflows/generated-windows-binary-conda-nightly.yml
generated
vendored
@ -64,7 +64,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -178,7 +178,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -310,7 +310,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -425,7 +425,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -558,7 +558,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -673,7 +673,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -806,7 +806,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -921,7 +921,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1053,7 +1053,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1167,7 +1167,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1299,7 +1299,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1414,7 +1414,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1547,7 +1547,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1662,7 +1662,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1795,7 +1795,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1910,7 +1910,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2042,7 +2042,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2156,7 +2156,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2288,7 +2288,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2403,7 +2403,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2536,7 +2536,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2651,7 +2651,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2784,7 +2784,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2899,7 +2899,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3031,7 +3031,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3145,7 +3145,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3277,7 +3277,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3392,7 +3392,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3525,7 +3525,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3640,7 +3640,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3773,7 +3773,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3888,7 +3888,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
4
.github/workflows/generated-windows-binary-libtorch-debug-main.yml
generated
vendored
4
.github/workflows/generated-windows-binary-libtorch-debug-main.yml
generated
vendored
@ -61,7 +61,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -179,7 +179,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
16
.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
generated
vendored
16
.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
generated
vendored
@ -68,7 +68,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -186,7 +186,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -326,7 +326,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -445,7 +445,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -586,7 +586,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -705,7 +705,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -846,7 +846,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -965,7 +965,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
4
.github/workflows/generated-windows-binary-libtorch-release-main.yml
generated
vendored
4
.github/workflows/generated-windows-binary-libtorch-release-main.yml
generated
vendored
@ -61,7 +61,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -179,7 +179,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
16
.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
generated
vendored
16
.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
generated
vendored
@ -68,7 +68,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -186,7 +186,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -326,7 +326,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -445,7 +445,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -586,7 +586,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -705,7 +705,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -846,7 +846,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -965,7 +965,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
80
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
80
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
@ -65,7 +65,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -179,7 +179,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -312,7 +312,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -427,7 +427,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -561,7 +561,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -676,7 +676,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -810,7 +810,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -925,7 +925,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1057,7 +1057,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1171,7 +1171,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1303,7 +1303,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1417,7 +1417,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1550,7 +1550,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1665,7 +1665,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1799,7 +1799,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -1914,7 +1914,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2048,7 +2048,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2163,7 +2163,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2295,7 +2295,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2409,7 +2409,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2541,7 +2541,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2655,7 +2655,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2788,7 +2788,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -2903,7 +2903,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3037,7 +3037,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3152,7 +3152,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3286,7 +3286,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3401,7 +3401,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3533,7 +3533,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3647,7 +3647,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3779,7 +3779,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -3893,7 +3893,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -4026,7 +4026,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -4141,7 +4141,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -4275,7 +4275,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -4390,7 +4390,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -4524,7 +4524,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -4639,7 +4639,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -4771,7 +4771,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
@ -4885,7 +4885,7 @@ jobs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
6
.github/workflows/lint-autoformat.yml
vendored
6
.github/workflows/lint-autoformat.yml
vendored
@ -2,9 +2,9 @@ name: Apply lint suggestions
|
||||
|
||||
on:
|
||||
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
branches: [main]
|
||||
push:
|
||||
tags:
|
||||
- ciflow/autoformat/*
|
||||
|
||||
jobs:
|
||||
lintrunner-autoformat:
|
||||
|
||||
4
.github/workflows/update-viablestrict.yml
vendored
4
.github/workflows/update-viablestrict.yml
vendored
@ -25,7 +25,9 @@ jobs:
|
||||
stable-branch: viable/strict
|
||||
requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\"]'
|
||||
secret-bot-token: ${{ secrets.MERGEBOT_TOKEN }}
|
||||
rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}
|
||||
clickhouse-url: ${{ secrets.CLICKHOUSE_URL }}
|
||||
clickhouse-username: ${{ secrets.CLICKHOUSE_VIABLESTRICT_USERNAME }}
|
||||
clickhouse-password: ${{ secrets.CLICKHOUSE_VIABLESTRICT_PASSWORD }}
|
||||
|
||||
- name: Authenticate to AWS with OIDC
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
|
||||
@ -341,19 +341,6 @@ cmake_dependent_option(
|
||||
cmake_dependent_option(USE_SYSTEM_UCC "Use system-wide UCC" OFF "USE_UCC" OFF)
|
||||
cmake_dependent_option(USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC"
|
||||
OFF)
|
||||
cmake_dependent_option(
|
||||
USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
|
||||
"USE_DISTRIBUTED" OFF)
|
||||
cmake_dependent_option(
|
||||
USE_GLOO_WITH_OPENSSL
|
||||
"Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
|
||||
"USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
|
||||
cmake_dependent_option(USE_C10D_GLOO "USE C10D GLOO" ON
|
||||
"USE_DISTRIBUTED;USE_GLOO" OFF)
|
||||
cmake_dependent_option(USE_C10D_NCCL "USE C10D NCCL" ON
|
||||
"USE_DISTRIBUTED;USE_NCCL" OFF)
|
||||
cmake_dependent_option(USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI"
|
||||
OFF)
|
||||
cmake_dependent_option(
|
||||
USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
|
||||
"USE_DISTRIBUTED" OFF)
|
||||
@ -469,6 +456,7 @@ option(USE_SYSTEM_FXDIV "Use system-provided fxdiv." OFF)
|
||||
option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF)
|
||||
option(USE_SYSTEM_ONNX "Use system-provided onnx." OFF)
|
||||
option(USE_SYSTEM_XNNPACK "Use system-provided xnnpack." OFF)
|
||||
OPTION(USE_SYSTEM_NVTX "Use system-provided nvtx." OFF)
|
||||
option(USE_GOLD_LINKER "Use ld.gold to link" OFF)
|
||||
if(USE_SYSTEM_LIBS)
|
||||
set(USE_SYSTEM_CPUINFO ON)
|
||||
@ -487,6 +475,7 @@ if(USE_SYSTEM_LIBS)
|
||||
if(USE_NCCL)
|
||||
set(USE_SYSTEM_NCCL ON)
|
||||
endif()
|
||||
set(USE_SYSTEM_NVTX ON)
|
||||
endif()
|
||||
|
||||
# /Z7 override option When generating debug symbols, CMake default to use the
|
||||
@ -1096,6 +1085,10 @@ if(NOT MSVC)
|
||||
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13)
|
||||
append_cxx_flag_if_supported("-Wno-error=dangling-reference" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-error=redundant-move" CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
else()
|
||||
# skip unwanted includes from windows.h
|
||||
add_compile_definitions(WIN32_LEAN_AND_MEAN)
|
||||
|
||||
@ -116,10 +116,10 @@ aten/src/ATen/detail/MTIAHooksInterface.h @egienvalue
|
||||
torch/csrc/mtia/ @egienvalue
|
||||
|
||||
# Profiler
|
||||
torch/csrc/autograd/profiler* @aaronenyeshi @sraikund16
|
||||
torch/autograd/profiler* @aaronenyeshi @sraikund16
|
||||
torch/csrc/profiler/ @aaronenyeshi @sraikund16
|
||||
torch/profiler/ @aaronenyeshi @sraikund16
|
||||
torch/csrc/autograd/profiler* @sraikund16
|
||||
torch/autograd/profiler* @sraikund16
|
||||
torch/csrc/profiler/ @sraikund16
|
||||
torch/profiler/ @sraikund16
|
||||
|
||||
# AOTDispatch tests
|
||||
test/functorch/test_aotdispatch.py @ezyang @Chillee
|
||||
|
||||
@ -39,8 +39,8 @@ class TORCH_API Context {
|
||||
|
||||
const Generator& defaultGenerator(Device device) {
|
||||
c10::DeviceType device_type = device.type();
|
||||
initCUDAIfNeeded(device_type);
|
||||
initHIPIfNeeded(device_type);
|
||||
lazyInitDevice(device_type);
|
||||
|
||||
if (device_type == at::kCPU) {
|
||||
return at::detail::getDefaultCPUGenerator();
|
||||
} else if (device_type == at::kCUDA) {
|
||||
@ -58,6 +58,7 @@ class TORCH_API Context {
|
||||
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
|
||||
}
|
||||
}
|
||||
|
||||
const AcceleratorHooksInterface& getAcceleratorHooksInterface(
|
||||
std::optional<c10::DeviceType> opt_device_type = std::nullopt) {
|
||||
c10::DeviceType device_type = opt_device_type.has_value()
|
||||
@ -80,16 +81,17 @@ class TORCH_API Context {
|
||||
c10::DeviceTypeName(device_type), " device type not an accelerator.");
|
||||
}
|
||||
}
|
||||
|
||||
Device getDeviceFromPtr(void* data, c10::DeviceType device_type) {
|
||||
initCUDAIfNeeded(device_type);
|
||||
initHIPIfNeeded(device_type);
|
||||
initXPUIfNeeded(device_type);
|
||||
lazyInitDevice(device_type);
|
||||
|
||||
if (device_type == at::kCPU) {
|
||||
return c10::DeviceType::CPU;
|
||||
} else {
|
||||
return getAcceleratorHooksInterface(device_type).getDeviceFromPtr(data);
|
||||
}
|
||||
}
|
||||
|
||||
bool isPinnedPtr(
|
||||
const void* data,
|
||||
std::optional<c10::DeviceType> device_type = std::nullopt) {
|
||||
@ -102,10 +104,20 @@ class TORCH_API Context {
|
||||
}
|
||||
return getAcceleratorHooksInterface(opt_device_type).isPinnedPtr(data);
|
||||
}
|
||||
|
||||
Allocator* getPinnedMemoryAllocator(
|
||||
std::optional<c10::DeviceType> device_type = std::nullopt) {
|
||||
return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator();
|
||||
}
|
||||
|
||||
void lazyInitDevice(c10::DeviceType device_type) {
|
||||
if (device_type != at::kCPU) {
|
||||
c10::call_once(init_[static_cast<int8_t>(device_type)], [&] {
|
||||
getAcceleratorHooksInterface(device_type).init();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static bool hasOpenMP();
|
||||
static bool hasMKL();
|
||||
static bool hasLAPACK();
|
||||
@ -158,27 +170,6 @@ class TORCH_API Context {
|
||||
static bool hasMAIA() {
|
||||
return c10::impl::hasDeviceGuardImpl(c10::DeviceType::MAIA);
|
||||
}
|
||||
// defined in header so that getNonVariableType has ability to inline
|
||||
// call_once check. getNonVariableType is called fairly frequently
|
||||
void lazyInitCUDA() {
|
||||
c10::call_once(thc_init, [&] { detail::getCUDAHooks().initCUDA(); });
|
||||
}
|
||||
void lazyInitHIP() {
|
||||
c10::call_once(thh_init, [&] { detail::getHIPHooks().initHIP(); });
|
||||
}
|
||||
void lazyInitXPU() {
|
||||
c10::call_once(thx_init, [&] { detail::getXPUHooks().initXPU(); });
|
||||
}
|
||||
void lazyInitMTIA() {
|
||||
c10::call_once(th_mtia_init, [&] { detail::getMTIAHooks().initMTIA(); });
|
||||
}
|
||||
void lazyInitPrivateUse1() {
|
||||
c10::call_once(thp_init, [&] {
|
||||
if (isPrivateUse1HooksRegistered()) {
|
||||
at::detail::getPrivateUse1Hooks().initPrivateUse1();
|
||||
}
|
||||
});
|
||||
}
|
||||
static const at::cuda::NVRTC& getNVRTC() {
|
||||
return detail::getCUDAHooks().nvrtc();
|
||||
}
|
||||
@ -353,28 +344,26 @@ class TORCH_API Context {
|
||||
bool allowFP16ReductionCPU() const;
|
||||
void setAllowFP16ReductionCPU(bool);
|
||||
|
||||
// Preserved for BC
|
||||
void lazyInitCUDA() {
|
||||
lazyInitDevice(at::kCUDA);
|
||||
}
|
||||
void lazyInitHIP() {
|
||||
lazyInitDevice(at::kHIP);
|
||||
}
|
||||
void lazyInitXPU() {
|
||||
lazyInitDevice(at::kXPU);
|
||||
}
|
||||
void lazyInitMTIA() {
|
||||
lazyInitDevice(at::kMTIA);
|
||||
}
|
||||
void lazyInitPrivateUse1() {
|
||||
lazyInitDevice(at::kPrivateUse1);
|
||||
}
|
||||
|
||||
private:
|
||||
void initCUDAIfNeeded(c10::DeviceType p) {
|
||||
if (p == c10::DeviceType::CUDA) {
|
||||
lazyInitCUDA();
|
||||
}
|
||||
}
|
||||
void initHIPIfNeeded(c10::DeviceType p) {
|
||||
if (p == c10::DeviceType::HIP) {
|
||||
lazyInitHIP();
|
||||
}
|
||||
}
|
||||
void initXPUIfNeeded(c10::DeviceType p) {
|
||||
if (p == c10::DeviceType::XPU) {
|
||||
lazyInitXPU();
|
||||
}
|
||||
}
|
||||
static bool checkCuBLASConfigDeterministic();
|
||||
c10::once_flag thc_init;
|
||||
c10::once_flag thh_init;
|
||||
c10::once_flag thx_init;
|
||||
c10::once_flag th_mtia_init;
|
||||
c10::once_flag thp_init;
|
||||
std::array<c10::once_flag, at::COMPILE_TIME_MAX_DEVICE_TYPES> init_;
|
||||
bool enabled_cudnn = true;
|
||||
bool deterministic_cudnn = false;
|
||||
bool deterministic_mkldnn = false;
|
||||
|
||||
@ -22,6 +22,13 @@ DLDataType getDLDataType(const Tensor& t) {
|
||||
case ScalarType::UInt64:
|
||||
dtype.code = DLDataTypeCode::kDLUInt;
|
||||
break;
|
||||
case ScalarType::Int1:
|
||||
case ScalarType::Int2:
|
||||
case ScalarType::Int3:
|
||||
case ScalarType::Int4:
|
||||
case ScalarType::Int5:
|
||||
case ScalarType::Int6:
|
||||
case ScalarType::Int7:
|
||||
case ScalarType::Char:
|
||||
dtype.code = DLDataTypeCode::kDLInt;
|
||||
break;
|
||||
@ -49,11 +56,7 @@ DLDataType getDLDataType(const Tensor& t) {
|
||||
dtype.code = DLDataTypeCode::kDLBool;
|
||||
break;
|
||||
case ScalarType::ComplexHalf:
|
||||
dtype.code = DLDataTypeCode::kDLComplex;
|
||||
break;
|
||||
case ScalarType::ComplexFloat:
|
||||
dtype.code = DLDataTypeCode::kDLComplex;
|
||||
break;
|
||||
case ScalarType::ComplexDouble:
|
||||
dtype.code = DLDataTypeCode::kDLComplex;
|
||||
break;
|
||||
@ -90,7 +93,7 @@ DLDataType getDLDataType(const Tensor& t) {
|
||||
|
||||
static DLDevice getDLDevice(const Tensor& tensor, c10::DeviceIndex device_id) {
|
||||
DLDevice ctx;
|
||||
ctx.device_id = static_cast<int32_t>(device_id);
|
||||
ctx.device_id = static_cast<int32_t>(static_cast<unsigned char>(device_id));
|
||||
switch (tensor.device().type()) {
|
||||
case DeviceType::CPU:
|
||||
ctx.device_type = DLDeviceType::kDLCPU;
|
||||
@ -253,10 +256,12 @@ ScalarType toScalarType(const DLDataType& dtype) {
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
||||
namespace {
|
||||
struct ATenDLMTensor {
|
||||
Tensor handle;
|
||||
DLManagedTensor tensor;
|
||||
DLManagedTensor tensor{};
|
||||
};
|
||||
} // namespace
|
||||
|
||||
static void deleter(DLManagedTensor* arg) {
|
||||
delete static_cast<ATenDLMTensor*>(arg->manager_ctx);
|
||||
|
||||
@ -78,8 +78,8 @@ TORCH_API void record_kernel_function_dtype(std::string name);
|
||||
AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \
|
||||
using scalar_t = scalar_type; \
|
||||
using underlying_t C10_UNUSED = typename scalar_t::underlying; \
|
||||
const auto& SCALAR_TYPE C10_UNUSED = enum_type; \
|
||||
const auto& UNDERLYING_TYPE C10_UNUSED = toUnderlying(enum_type); \
|
||||
C10_UNUSED const auto& SCALAR_TYPE = enum_type; \
|
||||
C10_UNUSED const auto& UNDERLYING_TYPE = toUnderlying(enum_type); \
|
||||
return __VA_ARGS__(); \
|
||||
}
|
||||
|
||||
@ -89,8 +89,8 @@ TORCH_API void record_kernel_function_dtype(std::string name);
|
||||
AT_PRIVATE_CHECK_SELECTIVE_BUILD(enum_type); \
|
||||
using scalar_t = scalar_type; \
|
||||
using underlying_t C10_UNUSED = typename scalar_t::underlying; \
|
||||
const auto& SCALAR_TYPE C10_UNUSED = enum_type; \
|
||||
const auto& UNDERLYING_TYPE C10_UNUSED = toUnderlying(enum_type); \
|
||||
C10_UNUSED const auto& SCALAR_TYPE = enum_type; \
|
||||
C10_UNUSED const auto& UNDERLYING_TYPE = toUnderlying(enum_type); \
|
||||
C10_UNUSED int bit_width = bitwidth; \
|
||||
C10_UNUSED int64_t quant_min = qmin; \
|
||||
C10_UNUSED int64_t quant_max = qmax; \
|
||||
|
||||
@ -112,12 +112,12 @@
|
||||
|
||||
// Ensure we never have too many scalar types for the expansion here to
|
||||
// support. To bump this, you must regenerate the macros below.
|
||||
static_assert(static_cast<int>(c10::ScalarType::NumOptions) < 45);
|
||||
static_assert(static_cast<int>(c10::ScalarType::NumOptions) < 60);
|
||||
|
||||
// Python code to regenerate generate code below:
|
||||
#if 0
|
||||
|
||||
num_args = 45
|
||||
num_args = 60
|
||||
|
||||
nums = ', '.join(str(i) for i in reversed(range(num_args+1)))
|
||||
args = ', '.join(f'_{i}' for i in range(1, num_args+1))
|
||||
@ -135,8 +135,8 @@ for i in range(1, num_args+1):
|
||||
// Begin generated code
|
||||
// clang-format off
|
||||
|
||||
#define AT_NUM_ARGS(...) AT_EXPAND(AT_NUM_ARGS_AUX(__VA_ARGS__, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
|
||||
#define AT_NUM_ARGS_AUX(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, N, ...) N
|
||||
#define AT_NUM_ARGS(...) AT_EXPAND(AT_NUM_ARGS_AUX(__VA_ARGS__, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
|
||||
#define AT_NUM_ARGS_AUX(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, N, ...) N
|
||||
#define AT_AP1(N, _1) AT_DISPATCH_CASE(_1, N)
|
||||
#define AT_AP2(N, _1, _2) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N)
|
||||
#define AT_AP3(N, _1, _2, _3) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N)
|
||||
@ -182,5 +182,21 @@ for i in range(1, num_args+1):
|
||||
#define AT_AP43(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N)
|
||||
#define AT_AP44(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N)
|
||||
#define AT_AP45(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N)
|
||||
#define AT_AP46(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N)
|
||||
#define AT_AP47(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N)
|
||||
#define AT_AP48(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N)
|
||||
#define AT_AP49(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N)
|
||||
#define AT_AP50(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N)
|
||||
#define AT_AP51(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N)
|
||||
#define AT_AP52(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N)
|
||||
#define AT_AP53(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N)
|
||||
#define AT_AP54(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N)
|
||||
#define AT_AP55(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N)
|
||||
#define AT_AP56(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N)
|
||||
#define AT_AP57(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N)
|
||||
#define AT_AP58(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N)
|
||||
#define AT_AP59(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N) AT_DISPATCH_CASE(_59, N)
|
||||
#define AT_AP60(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N) AT_DISPATCH_CASE(_59, N) AT_DISPATCH_CASE(_60, N)
|
||||
|
||||
// End generated code
|
||||
// clang-format on
|
||||
|
||||
@ -66,8 +66,7 @@ namespace at {
|
||||
} else if (ivalue.isTensorList()) {
|
||||
auto tensors = std::move(ivalue).toTensorList();
|
||||
for(const auto j : c10::irange(tensors.size())) {
|
||||
const auto& tensor_ref = tensors[j];
|
||||
const Tensor& tensor = tensor_ref;
|
||||
const Tensor& tensor = tensors[j];
|
||||
if (tensor._is_zerotensor()) {
|
||||
// TODO: assert requires_grad=False
|
||||
//_like should not propagate zerotensor dispatch key
|
||||
|
||||
@ -314,7 +314,7 @@ public:
|
||||
*
|
||||
* @return The number of elements removed. This is either '1' if an element with the key existed, or '0' if it didn't.
|
||||
*/
|
||||
C10_NODISCARD size_t erase(const Key& key) const;
|
||||
[[nodiscard]] size_t erase(const Key& key) const;
|
||||
|
||||
/**
|
||||
* Returns the mapped value of the element with key equivalent to key.
|
||||
|
||||
@ -142,8 +142,8 @@ void Dict<Key, Value>::erase(iterator iter) const {
|
||||
impl_->dict.erase(iter.entryRef_.iterator_);
|
||||
}
|
||||
|
||||
template<class Key, class Value>
|
||||
C10_NODISCARD size_t Dict<Key, Value>::erase(const Key& key) const {
|
||||
template <class Key, class Value>
|
||||
[[nodiscard]] size_t Dict<Key, Value>::erase(const Key& key) const {
|
||||
return impl_->dict.erase(key);
|
||||
}
|
||||
|
||||
|
||||
@ -168,8 +168,7 @@ class IListRefTagImpl<IListRefTag::Boxed, at::OptionalTensorRef>
|
||||
*/
|
||||
static IListRefConstRef<at::OptionalTensorRef> iterator_get(
|
||||
const typename list_type::const_iterator& it) {
|
||||
const auto& elem = *it;
|
||||
const auto& ivalue = elem.get();
|
||||
const auto& ivalue = (*it).get();
|
||||
if (!ivalue.isNone()) {
|
||||
const auto& tensor = ivalue.toTensor();
|
||||
return (tensor.defined()) ? tensor : at::OptionalTensorRef{};
|
||||
|
||||
@ -151,9 +151,7 @@ public:
|
||||
// no safe toTensorRef method, alas)
|
||||
ks = ks | ivalue.unsafeToTensorImpl()->key_set();
|
||||
} else if (C10_UNLIKELY(ivalue.isTensorList())) {
|
||||
const c10::List<at::Tensor> tensorlist = ivalue.toTensorList();
|
||||
for (const auto& tensor_ref : tensorlist) {
|
||||
const at::Tensor& tensor = tensor_ref;
|
||||
for (const at::Tensor& tensor : ivalue.toTensorList()) {
|
||||
ks = ks | tensor.key_set();
|
||||
}
|
||||
}
|
||||
|
||||
@ -108,7 +108,7 @@ struct TORCH_API Argument {
|
||||
return is_out_;
|
||||
}
|
||||
|
||||
C10_NODISCARD const AliasInfo* alias_info() const {
|
||||
[[nodiscard]] const AliasInfo* alias_info() const {
|
||||
return alias_info_.get();
|
||||
}
|
||||
|
||||
|
||||
@ -522,7 +522,7 @@ struct TORCH_API IValue final {
|
||||
}
|
||||
c10::intrusive_ptr<ivalue::Tuple> toTuple() &&;
|
||||
c10::intrusive_ptr<ivalue::Tuple> toTuple() const&;
|
||||
C10_NODISCARD ivalue::Tuple& toTupleRef() const;
|
||||
[[nodiscard]] ivalue::Tuple& toTupleRef() const;
|
||||
|
||||
// Double
|
||||
IValue(double d) : tag(Tag::Double) {
|
||||
|
||||
@ -500,7 +500,7 @@ struct TORCH_API TupleElements {
|
||||
return *this;
|
||||
}
|
||||
|
||||
C10_NODISCARD c10::ArrayRef<IValue> asArrayRef() const {
|
||||
[[nodiscard]] c10::ArrayRef<IValue> asArrayRef() const {
|
||||
if (inlineSize_) {
|
||||
return c10::ArrayRef<IValue>(elementsInline_, inlineSize_);
|
||||
} else {
|
||||
@ -527,15 +527,15 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD bool empty() const {
|
||||
[[nodiscard]] bool empty() const {
|
||||
return inlineSize_ ? false : elementsVector_.empty();
|
||||
}
|
||||
|
||||
C10_NODISCARD size_t size() const {
|
||||
[[nodiscard]] size_t size() const {
|
||||
return inlineSize_ ? inlineSize_ : elementsVector_.size();
|
||||
}
|
||||
|
||||
C10_NODISCARD IValue& operator[](size_t idx) {
|
||||
[[nodiscard]] IValue& operator[](size_t idx) {
|
||||
if (inlineSize_) {
|
||||
return elementsInline_[idx];
|
||||
} else {
|
||||
@ -543,7 +543,7 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD const IValue& operator[](size_t idx) const {
|
||||
[[nodiscard]] const IValue& operator[](size_t idx) const {
|
||||
if (inlineSize_) {
|
||||
return elementsInline_[idx];
|
||||
} else {
|
||||
@ -551,7 +551,7 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD IValue& at(size_t idx) {
|
||||
[[nodiscard]] IValue& at(size_t idx) {
|
||||
if (inlineSize_) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inlineSize_ <= 3);
|
||||
TORCH_CHECK(idx < inlineSize_, "TupleElements: invalid index Index = ", idx, "; Length = ", inlineSize_);
|
||||
@ -561,7 +561,7 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD const IValue& at(size_t idx) const {
|
||||
[[nodiscard]] const IValue& at(size_t idx) const {
|
||||
if (inlineSize_) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inlineSize_ <= 3);
|
||||
TORCH_CHECK(idx < inlineSize_, "TupleElements: invalid index Index = ", idx, "; Length = ", inlineSize_);
|
||||
@ -572,7 +572,7 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD iterator begin() {
|
||||
[[nodiscard]] iterator begin() {
|
||||
if (inlineSize_) {
|
||||
return elementsInline_;
|
||||
} else {
|
||||
@ -580,7 +580,7 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD iterator end() {
|
||||
[[nodiscard]] iterator end() {
|
||||
if (inlineSize_) {
|
||||
return elementsInline_ + inlineSize_;
|
||||
} else {
|
||||
@ -588,7 +588,7 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD const_iterator begin() const {
|
||||
[[nodiscard]] const_iterator begin() const {
|
||||
if (inlineSize_) {
|
||||
return elementsInline_;
|
||||
} else {
|
||||
@ -596,7 +596,7 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD const_iterator end() const {
|
||||
[[nodiscard]] const_iterator end() const {
|
||||
if (inlineSize_) {
|
||||
return elementsInline_ + inlineSize_;
|
||||
} else {
|
||||
@ -604,27 +604,27 @@ struct TORCH_API TupleElements {
|
||||
}
|
||||
}
|
||||
|
||||
C10_NODISCARD const_iterator cbegin() const {
|
||||
[[nodiscard]] const_iterator cbegin() const {
|
||||
return begin();
|
||||
}
|
||||
|
||||
C10_NODISCARD const_iterator cend() const {
|
||||
[[nodiscard]] const_iterator cend() const {
|
||||
return end();
|
||||
}
|
||||
|
||||
C10_NODISCARD std::vector<IValue> vec() const & {
|
||||
[[nodiscard]] std::vector<IValue> vec() const& {
|
||||
return asArrayRef().vec();
|
||||
}
|
||||
|
||||
C10_NODISCARD IValue& back() {
|
||||
[[nodiscard]] IValue& back() {
|
||||
return *(end() - 1);
|
||||
}
|
||||
|
||||
C10_NODISCARD const IValue& back() const {
|
||||
[[nodiscard]] const IValue& back() const {
|
||||
return *(end() - 1);
|
||||
}
|
||||
|
||||
C10_NODISCARD std::vector<IValue> vec() && {
|
||||
[[nodiscard]] std::vector<IValue> vec() && {
|
||||
std::vector<IValue> result;
|
||||
result.reserve(size());
|
||||
for (auto&& iv : *this) {
|
||||
|
||||
@ -271,9 +271,9 @@ struct VecConvert<
|
||||
1,
|
||||
int64_t,
|
||||
2,
|
||||
typename std::enable_if<
|
||||
std::enable_if_t<
|
||||
std::is_same_v<dst_t, int8_t> ||
|
||||
std::is_same_v<dst_t, uint8_t>>::type> {
|
||||
std::is_same_v<dst_t, uint8_t>>> {
|
||||
static inline VectorizedN<dst_t, 1> apply(
|
||||
const VectorizedN<int64_t, 2>& src) {
|
||||
return VecConvert<dst_t, 1, int32_t, 1>::apply(
|
||||
|
||||
@ -1190,8 +1190,8 @@ struct Vectorized<T, std::enable_if_t<is_zarch_implemented<T>()>> {
|
||||
typename U = T,
|
||||
std::enable_if_t<std::is_same<U, double>::value, int> = 0>
|
||||
Vectorized<T> swapped() const {
|
||||
vtype v0 = vec_permi(_vec0, _vec0, 2);
|
||||
vtype v1 = vec_permi(_vec1, _vec1, 2);
|
||||
vtype v0 = {_vec0[1], _vec0[0]};
|
||||
vtype v1 = {_vec1[1], _vec1[0]};
|
||||
return {v0, v1};
|
||||
}
|
||||
|
||||
@ -1685,6 +1685,7 @@ std::pair<Vectorized<V>, Vectorized<V>> unpack(const Vectorized<T>& x) {
|
||||
return {Vectorized<V>{vec0, vec1}, Vectorized<V>{vec2, vec3}};
|
||||
}
|
||||
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-function")
|
||||
template <>
|
||||
std::pair<Vectorized<int16_t>, Vectorized<int16_t>> unpack<uint8_t, int16_t>(
|
||||
const Vectorized<uint8_t>& x) {
|
||||
@ -1702,6 +1703,7 @@ std::pair<Vectorized<int16_t>, Vectorized<int16_t>> unpack<uint8_t, int16_t>(
|
||||
cast_zvector<uint16_t, int16_t>(Vectorized<uint16_t>{vec0, vec1}),
|
||||
cast_zvector<uint16_t, int16_t>(Vectorized<uint16_t>{vec2, vec3})};
|
||||
}
|
||||
C10_DIAGNOSTIC_POP()
|
||||
|
||||
template <typename T, typename V = typename pack_type<T>::type>
|
||||
Vectorized<V> pack(const Vectorized<T>& first, const Vectorized<T>& second) {
|
||||
@ -1710,6 +1712,7 @@ Vectorized<V> pack(const Vectorized<T>& first, const Vectorized<T>& second) {
|
||||
return Vectorized<V>{vec0, vec1};
|
||||
}
|
||||
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-function")
|
||||
template <>
|
||||
Vectorized<uint8_t> pack(
|
||||
const Vectorized<int16_t>& first,
|
||||
@ -1718,6 +1721,7 @@ Vectorized<uint8_t> pack(
|
||||
auto vec1 = vec_packsu(second.vec0(), second.vec1());
|
||||
return Vectorized<uint8_t>{vec0, vec1};
|
||||
}
|
||||
C10_DIAGNOSTIC_POP()
|
||||
|
||||
} /* unnamed namespace */
|
||||
|
||||
@ -1735,7 +1739,7 @@ struct Vectorized<T, std::enable_if_t<is_zarch_implemented_quant<T>()>> {
|
||||
return VECTOR_WIDTH / sizeof(value_type);
|
||||
}
|
||||
|
||||
static constexpr size_t float_num_vecs() {
|
||||
static constexpr int float_num_vecs() {
|
||||
return size() / Vectorized<float>::size();
|
||||
}
|
||||
static constexpr int int_num_vecs() {
|
||||
@ -2419,8 +2423,8 @@ struct Vectorized<T, std::enable_if_t<is_zarch_implemented_complex<T>()>> {
|
||||
static typename Vectorized<T>::vinner_type real_neg(const typename Vectorized<T>::vinner_type &a)
|
||||
{
|
||||
auto a_neg = a.neg();
|
||||
auto v0 = vec_permi(a_neg.vec0(), a.vec0(), 1);
|
||||
auto v1 = vec_permi(a_neg.vec1(), a.vec1(), 1);
|
||||
vtype v0 = {a_neg.vec0()[0], a.vec0()[1]};
|
||||
vtype v1 = {a_neg.vec1()[0], a.vec1()[1]};
|
||||
return { v0, v1 };
|
||||
}
|
||||
|
||||
@ -2732,10 +2736,10 @@ std::pair<Vectorized<T>, Vectorized<T>> inline inner_interleave2(
|
||||
// a = {a0, a1, a2, a3}
|
||||
// b = {b0, b1, b2, b3}
|
||||
using vtype = typename Vectorized<T>::vtype;
|
||||
vtype ab00 = vec_permi(a.vec0(), b.vec0(), 0);
|
||||
vtype ab11 = vec_permi(a.vec0(), b.vec0(), 3);
|
||||
vtype ab2_00 = vec_permi(a.vec1(), b.vec1(), 0);
|
||||
vtype ab2_11 = vec_permi(a.vec1(), b.vec1(), 3);
|
||||
vtype ab00 = {a.vec0()[0], b.vec0()[0]};
|
||||
vtype ab11 = {a.vec0()[1], b.vec0()[1]};
|
||||
vtype ab2_00 = {a.vec1()[0], b.vec1()[0]};
|
||||
vtype ab2_11 = {a.vec1()[1], b.vec1()[1]};
|
||||
// return {a0, b0, a1, b1}
|
||||
// {a2, b2, a3, b3}
|
||||
return std::make_pair(
|
||||
@ -2750,11 +2754,11 @@ std::pair<Vectorized<T>, Vectorized<T>> inline inner_deinterleave2(
|
||||
// a = {a0, b0, a1, b1}
|
||||
// b = {a2, b2, a3, b3}
|
||||
using vtype = typename Vectorized<T>::vtype;
|
||||
vtype aa01 = vec_permi(a.vec0(), a.vec1(), 0);
|
||||
vtype aa23 = vec_permi(b.vec0(), b.vec1(), 0);
|
||||
vtype aa01 = {a.vec0()[0], a.vec1()[0]};
|
||||
vtype aa23 = {b.vec0()[0], b.vec1()[0]};
|
||||
|
||||
vtype bb_01 = vec_permi(a.vec0(), a.vec1(), 3);
|
||||
vtype bb_23 = vec_permi(b.vec0(), b.vec1(), 3);
|
||||
vtype bb_01 = {a.vec0()[1], a.vec1()[1]};
|
||||
vtype bb_23 = {b.vec0()[1], b.vec1()[1]};
|
||||
|
||||
// swap lanes:
|
||||
// return {a0, a1, a2, a3}
|
||||
@ -2868,7 +2872,7 @@ std::pair<Vectorized<int64_t>, Vectorized<int64_t>> inline deinterleave2<
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<std::is_same<T, uint8_t>::value, at::vec::Vectorized<float>>::type
|
||||
std::enable_if_t<std::is_same_v<T, uint8_t>, at::vec::Vectorized<float>>
|
||||
inline convert_int8_to_float(const Vectorized<T> &src) {
|
||||
// Note: this function only convert inputs number of elements equal to at::vec::Vectorized<float>.size()
|
||||
// Only handle first 64 bits
|
||||
@ -2878,7 +2882,7 @@ inline convert_int8_to_float(const Vectorized<T> &src) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<std::is_same<T, uint8_t>::value, at::vec::Vectorized<T>>::type
|
||||
std::enable_if_t<std::is_same_v<T, uint8_t>, at::vec::Vectorized<T>>
|
||||
inline convert_float_to_int8(const Vectorized<float> &src) {
|
||||
constexpr auto min_val = std::numeric_limits<T>::min();
|
||||
constexpr auto max_val = std::numeric_limits<T>::max();
|
||||
|
||||
@ -281,9 +281,9 @@ struct VecConvert<
|
||||
1,
|
||||
int64_t,
|
||||
2,
|
||||
typename std::enable_if<
|
||||
std::enable_if_t<
|
||||
std::is_same_v<dst_t, int8_t> ||
|
||||
std::is_same_v<dst_t, uint8_t>>::type> {
|
||||
std::is_same_v<dst_t, uint8_t>>> {
|
||||
static inline VectorizedN<dst_t, 1> apply(
|
||||
const VectorizedN<int64_t, 2>& src) {
|
||||
return VecConvert<dst_t, 1, int32_t, 1>::apply(
|
||||
|
||||
@ -84,9 +84,9 @@ struct VecMaskLoad<
|
||||
dst_n,
|
||||
mask_t,
|
||||
dst_n,
|
||||
typename std::enable_if<
|
||||
std::enable_if_t<
|
||||
std::is_same_v<data_t, BFloat16> ||
|
||||
std::is_same_v<data_t, Half>>::type> {
|
||||
std::is_same_v<data_t, Half>>> {
|
||||
static inline VectorizedN<data_t, dst_n> apply(
|
||||
const data_t* ptr,
|
||||
const VecMask<mask_t, dst_n>& vec_mask) {
|
||||
@ -151,9 +151,9 @@ struct VecMaskLoad<
|
||||
1,
|
||||
mask_t,
|
||||
1,
|
||||
typename std::enable_if<
|
||||
std::enable_if_t<
|
||||
std::is_same_v<data_t, int8_t> ||
|
||||
std::is_same_v<data_t, uint8_t>>::type> {
|
||||
std::is_same_v<data_t, uint8_t>>> {
|
||||
static inline VectorizedN<data_t, 1> apply(
|
||||
const data_t* ptr,
|
||||
const VecMask<mask_t, 1>& vec_mask) {
|
||||
@ -173,9 +173,9 @@ struct VecMaskLoad<
|
||||
2,
|
||||
mask_t,
|
||||
1,
|
||||
typename std::enable_if<
|
||||
std::enable_if_t<
|
||||
std::is_same_v<data_t, int64_t> ||
|
||||
std::is_same_v<data_t, double>>::type> {
|
||||
std::is_same_v<data_t, double>>> {
|
||||
static inline VectorizedN<data_t, 2> apply(
|
||||
const data_t* ptr,
|
||||
const VecMask<mask_t, 1>& vec_mask) {
|
||||
|
||||
@ -10,7 +10,7 @@ TensorBase empty_cuda(
|
||||
ScalarType dtype,
|
||||
std::optional<Device> device_opt,
|
||||
std::optional<c10::MemoryFormat> memory_format_opt) {
|
||||
at::globalContext().lazyInitCUDA();
|
||||
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
|
||||
const auto device = device_or_default(device_opt);
|
||||
TORCH_INTERNAL_ASSERT(device.is_cuda());
|
||||
const DeviceGuard device_guard(device);
|
||||
@ -50,7 +50,7 @@ TensorBase empty_strided_cuda(
|
||||
IntArrayRef stride,
|
||||
ScalarType dtype,
|
||||
std::optional<Device> device_opt) {
|
||||
at::globalContext().lazyInitCUDA();
|
||||
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
|
||||
const auto device = device_or_default(device_opt);
|
||||
TORCH_INTERNAL_ASSERT(device.is_cuda());
|
||||
const DeviceGuard device_guard(device);
|
||||
|
||||
@ -162,7 +162,7 @@ constexpr const char* _cusolver_backend_suggestion = \
|
||||
CUresult __err = EXPR; \
|
||||
if (__err != CUDA_SUCCESS) { \
|
||||
const char* err_str; \
|
||||
CUresult get_error_str_err C10_UNUSED = at::globalContext().getNVRTC().cuGetErrorString(__err, &err_str); \
|
||||
C10_UNUSED CUresult get_error_str_err = at::globalContext().getNVRTC().cuGetErrorString(__err, &err_str); \
|
||||
if (get_error_str_err != CUDA_SUCCESS) { \
|
||||
AT_ERROR("CUDA driver error: unknown error"); \
|
||||
} else { \
|
||||
|
||||
@ -34,7 +34,7 @@ void init_p2p_access_cache(int64_t num_devices) {
|
||||
} // namespace detail
|
||||
|
||||
bool get_p2p_access(int dev, int dev_to_access) {
|
||||
at::globalContext().lazyInitCUDA();
|
||||
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
|
||||
|
||||
TORCH_CHECK(dev >= 0 || dev < num_devices_,
|
||||
dev, " is not a device");
|
||||
|
||||
@ -84,7 +84,7 @@ struct _Initializer {
|
||||
// NB: deleter is dynamic, because we need it to live in a separate
|
||||
// compilation unit (alt is to have another method in hooks, but
|
||||
// let's not if we don't need to!)
|
||||
void CUDAHooks::initCUDA() const {
|
||||
void CUDAHooks::init() const {
|
||||
C10_LOG_API_USAGE_ONCE("aten.init.cuda");
|
||||
// Force the update to enable unit testing. This code get executed before unit tests
|
||||
// have a chance to enable vitals.
|
||||
|
||||
@ -19,7 +19,7 @@ TORCH_CUDA_CPP_API void set_magma_init_fn(void (*magma_init_fn)());
|
||||
// The real implementation of CUDAHooksInterface
|
||||
struct CUDAHooks : public at::CUDAHooksInterface {
|
||||
CUDAHooks(at::CUDAHooksArgs) {}
|
||||
void initCUDA() const override;
|
||||
void init() const override;
|
||||
Device getDeviceFromPtr(void* data) const override;
|
||||
bool isPinnedPtr(const void* data) const override;
|
||||
const Generator& getDefaultCUDAGenerator(DeviceIndex device_index = -1) const override;
|
||||
|
||||
@ -19,6 +19,10 @@ struct TORCH_API AcceleratorHooksInterface {
|
||||
// Whether the device at device_index is fully initialized or not.
|
||||
virtual bool hasPrimaryContext(DeviceIndex device_index) const = 0;
|
||||
|
||||
virtual void init() const {
|
||||
TORCH_CHECK(false, "Backend doesn`t support init()");
|
||||
}
|
||||
|
||||
virtual DeviceIndex deviceCount() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -65,7 +65,7 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
|
||||
~CUDAHooksInterface() override = default;
|
||||
|
||||
// Initialize THCState and, transitively, the CUDA state
|
||||
virtual void initCUDA() const {
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP);
|
||||
}
|
||||
|
||||
|
||||
@ -26,9 +26,8 @@ struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
|
||||
// squelch -Werror=non-virtual-dtor
|
||||
~HIPHooksInterface() override = default;
|
||||
|
||||
// Initialize the HIP library state
|
||||
virtual void initHIP() const {
|
||||
AT_ERROR("Cannot initialize HIP without ATen_hip library.");
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library.");
|
||||
}
|
||||
|
||||
virtual std::unique_ptr<c10::GeneratorImpl> initHIPGenerator(Context*) const {
|
||||
|
||||
@ -1,14 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
struct TORCH_API IPUHooksInterface {
|
||||
virtual ~IPUHooksInterface() = default;
|
||||
struct TORCH_API IPUHooksInterface: AcceleratorHooksInterface {
|
||||
~IPUHooksInterface() override = default;
|
||||
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
|
||||
}
|
||||
|
||||
bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual const Generator& getDefaultIPUGenerator(
|
||||
DeviceIndex device_index [[maybe_unused]] = -1) const {
|
||||
|
||||
@ -3,13 +3,24 @@
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
// NB: Class must live in `at` due to limitations of Registry.h.
|
||||
namespace at {
|
||||
|
||||
struct TORCH_API MAIAHooksInterface {
|
||||
struct TORCH_API MAIAHooksInterface : AcceleratorHooksInterface {
|
||||
// This should never actually be implemented, but it is used to
|
||||
// squelch -Werror=non-virtual-dtor
|
||||
virtual ~MAIAHooksInterface() = default;
|
||||
~MAIAHooksInterface() override = default;
|
||||
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library.");
|
||||
}
|
||||
|
||||
bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library.");
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual std::string showConfig() const {
|
||||
TORCH_CHECK(false, "Cannot query detailed MAIA version information.");
|
||||
|
||||
@ -22,7 +22,7 @@ struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface {
|
||||
~MPSHooksInterface() override = default;
|
||||
|
||||
// Initialize the MPS library state
|
||||
virtual void initMPS() const {
|
||||
void init() const override {
|
||||
FAIL_MPSHOOKS_FUNC(__func__);
|
||||
}
|
||||
virtual bool hasMPS() const {
|
||||
|
||||
@ -31,7 +31,7 @@ struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface {
|
||||
|
||||
~MTIAHooksInterface() override = default;
|
||||
|
||||
virtual void initMTIA() const {
|
||||
void init() const override {
|
||||
// Avoid logging here, since MTIA needs init devices first then it will know
|
||||
// how many devices are available. Make it as no-op if mtia extension is not
|
||||
// dynamically loaded.
|
||||
|
||||
@ -40,7 +40,7 @@ struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `hasPrimaryContext`.");
|
||||
}
|
||||
|
||||
virtual void initPrivateUse1() const {}
|
||||
void init() const override {}
|
||||
virtual void resizePrivateUse1Bytes(
|
||||
const c10::Storage& storage,
|
||||
size_t newsize) const {
|
||||
|
||||
@ -14,10 +14,8 @@ namespace at {
|
||||
struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
|
||||
~XPUHooksInterface() override = default;
|
||||
|
||||
virtual void initXPU() const {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Cannot initialize XPU without ATen_xpu library.");
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize XPU without ATen_xpu library.");
|
||||
}
|
||||
|
||||
virtual bool hasXPU() const {
|
||||
|
||||
@ -36,7 +36,9 @@ id<MTLLibrary> MPSDevice::getMetalIndexingLibrary() {
|
||||
if (isMacOS13Plus(MacOSVersion::MACOS_VER_15_0_PLUS)) {
|
||||
options.mathMode = MTLMathModeFast;
|
||||
} else {
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdeprecated-declarations")
|
||||
[options setFastMathEnabled:YES];
|
||||
C10_DIAGNOSTIC_POP()
|
||||
}
|
||||
_mtl_indexing_library = [_mtl_device newLibraryWithSource:[NSString stringWithCString:mps::indexing_metal_shaders
|
||||
encoding:NSASCIIStringEncoding]
|
||||
|
||||
@ -12,7 +12,7 @@ namespace at::mps {
|
||||
// The real implementation of MPSHooksInterface
|
||||
struct MPSHooks : public at::MPSHooksInterface {
|
||||
MPSHooks(at::MPSHooksArgs) {}
|
||||
void initMPS() const override;
|
||||
void init() const override;
|
||||
|
||||
// MPSDevice interface
|
||||
bool hasMPS() const override;
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
|
||||
namespace at::mps {
|
||||
|
||||
void MPSHooks::initMPS() const {
|
||||
void MPSHooks::init() const {
|
||||
C10_LOG_API_USAGE_ONCE("aten.init.mps");
|
||||
// TODO: initialize MPS devices and streams here
|
||||
}
|
||||
|
||||
@ -106,7 +106,7 @@ bool is_fast_path(const Tensor& src, const std::optional<Tensor>& scale, Tensor&
|
||||
// index_add (using add_indices as the index), without creating an intermediary
|
||||
// tensor to hold the selected embeddings
|
||||
template <typename data_t, typename index_t>
|
||||
static typename std::enable_if<std::is_same<data_t, double>::value, void>::type
|
||||
static std::enable_if_t<std::is_same_v<data_t, double>, void>
|
||||
index_select_add(
|
||||
const Tensor& select_indices,
|
||||
const Tensor& add_indices,
|
||||
@ -184,10 +184,9 @@ void fbgemm_spmdm_report_error_(
|
||||
} // namespace
|
||||
|
||||
template <typename data_t, typename index_t>
|
||||
typename std::enable_if<
|
||||
std::is_same<data_t, at::Half>::value ||
|
||||
std::is_same<data_t, at::BFloat16>::value,
|
||||
void>::type
|
||||
std::enable_if_t<
|
||||
std::is_same_v<data_t, at::Half> || std::is_same_v<data_t, at::BFloat16>,
|
||||
void>
|
||||
index_select_add(
|
||||
const Tensor& select_indices,
|
||||
const Tensor& add_indices,
|
||||
@ -366,7 +365,7 @@ index_select_add(
|
||||
}
|
||||
}
|
||||
template<typename data_t, typename index_t>
|
||||
typename std::enable_if<std::is_same<data_t, float>::value, void>::type
|
||||
std::enable_if_t<std::is_same_v<data_t, float>, void>
|
||||
index_select_add(const Tensor &select_indices,
|
||||
const Tensor &add_indices,
|
||||
const Tensor &src,
|
||||
@ -493,7 +492,7 @@ index_select_add(const Tensor &select_indices,
|
||||
// mul (scaling by per_sample_weights)
|
||||
// index_add (using add_indices as the index)
|
||||
template <typename data_t, typename index_t>
|
||||
static typename std::enable_if<std::is_same<data_t, double>::value, void>::type
|
||||
static std::enable_if_t<std::is_same_v<data_t, double>, void>
|
||||
index_select_scale_add(
|
||||
const Tensor& select_indices,
|
||||
const Tensor& add_indices,
|
||||
@ -548,10 +547,9 @@ index_select_scale_add(
|
||||
}
|
||||
|
||||
template <typename data_t, typename index_t>
|
||||
typename std::enable_if<
|
||||
std::is_same<data_t, at::Half>::value ||
|
||||
std::is_same<data_t, at::BFloat16>::value,
|
||||
void>::type
|
||||
std::enable_if_t<
|
||||
std::is_same_v<data_t, at::Half> || std::is_same_v<data_t, at::BFloat16>,
|
||||
void>
|
||||
index_select_scale_add(
|
||||
const Tensor& select_indices,
|
||||
const Tensor& add_indices,
|
||||
@ -741,7 +739,7 @@ index_select_scale_add(
|
||||
}
|
||||
}
|
||||
template<typename data_t, typename index_t>
|
||||
typename std::enable_if<std::is_same<data_t, float>::value, void>::type
|
||||
std::enable_if_t<std::is_same_v<data_t, float>, void>
|
||||
index_select_scale_add(const Tensor &select_indices,
|
||||
const Tensor &add_indices,
|
||||
const Tensor &scale,
|
||||
|
||||
@ -14,7 +14,7 @@ static void invalid_mask(const Tensor & self, int64_t idx, const Tensor & mask,
|
||||
}
|
||||
|
||||
|
||||
static C10_UNUSED std::vector<Tensor> expandTensors(const Tensor & self, IOptTensorListRef indices) {
|
||||
C10_UNUSED static std::vector<Tensor> expandTensors(const Tensor & self, IOptTensorListRef indices) {
|
||||
// If indices come in as ByteTensor or BoolTensor (masks), expand them into the equivalent indexing by LongTensors
|
||||
std::vector<Tensor> result;
|
||||
for (const auto& index_opt : indices) {
|
||||
@ -48,7 +48,7 @@ static C10_UNUSED std::vector<Tensor> expandTensors(const Tensor & self, IOptTen
|
||||
return result;
|
||||
}
|
||||
|
||||
static C10_UNUSED void checkIndexTensorTypes(IOptTensorListRef indices, bool allow_int=false) {
|
||||
C10_UNUSED static void checkIndexTensorTypes(IOptTensorListRef indices, bool allow_int=false) {
|
||||
for (const auto& tensor : indices) {
|
||||
if (tensor.has_value() && tensor->defined()) {
|
||||
auto scalarType = tensor->scalar_type();
|
||||
@ -83,7 +83,7 @@ inline torch::List<std::optional<Tensor>> toListOfOptionalTensors(ArrayRef<IValu
|
||||
return result;
|
||||
}
|
||||
|
||||
static C10_UNUSED bool hasContiguousSubspace(TensorList tl) {
|
||||
C10_UNUSED static bool hasContiguousSubspace(TensorList tl) {
|
||||
// true if all the non-null tensors are adjacent
|
||||
auto isDefined = [](const Tensor & tensor){ return tensor.defined(); };
|
||||
auto isNull = [](const Tensor & tensor){ return !tensor.defined(); };
|
||||
@ -100,7 +100,7 @@ static C10_UNUSED bool hasContiguousSubspace(TensorList tl) {
|
||||
// transposeToFront(tensor, {nullptr, a, nullptr, b})
|
||||
// returns
|
||||
// tensor.permute([1, 3, 0, 2]), {a, b, nullptr, nullptr}
|
||||
static C10_UNUSED std::tuple<Tensor, std::vector<Tensor>>
|
||||
C10_UNUSED static std::tuple<Tensor, std::vector<Tensor>>
|
||||
transposeToFront(const Tensor& self, TensorList indices) {
|
||||
std::vector<int64_t> dims;
|
||||
std::vector<Tensor> transposedIndices;
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
#include <ATen/AccumulateType.h>
|
||||
#include <ATen/NumericUtils.h>
|
||||
#include <ATen/jiterator_macros.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
#include <c10/util/BFloat16.h>
|
||||
#include <c10/util/Half.h>
|
||||
#include <c10/util/MathConstants.h>
|
||||
@ -3071,14 +3072,14 @@ inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, int64_t n) {
|
||||
return r;
|
||||
} // hermite_polynomial_h_forward(T x, int64_t n)
|
||||
|
||||
template<typename T, bool is_cuda=false, std::enable_if_t<!std::is_floating_point<T>::value, int> = 0>
|
||||
template<typename T, bool is_cuda=false, std::enable_if_t<!std::is_floating_point_v<T>, int> = 0>
|
||||
inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
|
||||
return hermite_polynomial_h_forward(x, static_cast<int64_t>(n));
|
||||
} // hermite_polynomial_h_forward(T x, T n)
|
||||
|
||||
template<typename T, bool is_cuda=false, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
|
||||
inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
|
||||
return hermite_polynomial_h_forward(x, ((!std::isinf(n)) && (!std::isnan(n))) ? static_cast<int64_t>(n) : static_cast<int64_t>(-1));
|
||||
template<typename T, bool is_cuda=false, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
__ubsan_ignore_float_cast_overflow__ inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
|
||||
return hermite_polynomial_h_forward(x, (!std::isinf(n) && !std::isnan(n)) ? static_cast<int64_t>(n) : static_cast<int64_t>(-1));
|
||||
} // hermite_polynomial_h_forward(T x, T n)
|
||||
|
||||
template<typename T>
|
||||
|
||||
@ -23,7 +23,7 @@ namespace native {
|
||||
// e.g. since 2**-1==0.5, the truncated integral result is zero. 1**negative_exponent is the
|
||||
// only non-zero result.
|
||||
template <class T,
|
||||
typename std::enable_if<std::is_integral<T>::value, T>::type* = nullptr>
|
||||
std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
|
||||
inline HOST_DEVICE __ubsan_ignore_signed_int_overflow__ T powi_impl(T a, T b) {
|
||||
T result = 1;
|
||||
while (b) {
|
||||
@ -37,13 +37,13 @@ inline HOST_DEVICE __ubsan_ignore_signed_int_overflow__ T powi_impl(T a, T b) {
|
||||
}
|
||||
|
||||
template <class T,
|
||||
typename std::enable_if<std::is_integral<T>::value && !std::is_signed<T>::value, T>::type* = nullptr>
|
||||
std::enable_if_t<std::is_integral_v<T> && !std::is_signed_v<T>, T>* = nullptr>
|
||||
inline HOST_DEVICE T powi(T a, T b) {
|
||||
return powi_impl(a, b);
|
||||
}
|
||||
|
||||
template <class T,
|
||||
typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, T>::type* = nullptr>
|
||||
std::enable_if_t<std::is_integral_v<T> && std::is_signed_v<T>, T>* = nullptr>
|
||||
inline HOST_DEVICE T powi(T a, T b) {
|
||||
if ( b < 0 ) {
|
||||
if ( a == 1 ) {
|
||||
|
||||
@ -179,7 +179,7 @@ struct CellParams : public CellParamsBase {
|
||||
const Tensor& _b_ih,
|
||||
const Tensor& _b_hh,
|
||||
const Tensor& _w_hr)
|
||||
: w_ih(_w_ih), w_hh(_w_hh), b_ih_(_b_ih), b_hh_(_b_hh), w_hr(_w_hr) {};
|
||||
: w_ih(_w_ih), w_hh(_w_hh), b_ih_(_b_ih), b_hh_(_b_hh), w_hr(_w_hr) {}
|
||||
|
||||
const Tensor& w_ih;
|
||||
const Tensor& w_hh;
|
||||
@ -825,7 +825,7 @@ struct FullLayer : Layer<Tensor, hidden_type, cell_params> {
|
||||
using unstacked_output_type = LayerOutput<std::vector<Tensor>, hidden_type>;
|
||||
|
||||
FullLayer(Cell<hidden_type, cell_params>& cell)
|
||||
: cell_(cell) {};
|
||||
: cell_(cell) {}
|
||||
|
||||
unstacked_output_type operator()(
|
||||
const std::vector<Tensor>& step_inputs,
|
||||
@ -870,7 +870,7 @@ struct FullBidirectionalLayer
|
||||
using output_type = typename Layer<Tensor, hidden_type, param_type>::output_type;
|
||||
|
||||
FullBidirectionalLayer(Cell<dir_hidden_type, cell_params>& cell)
|
||||
: layer_(cell) {};
|
||||
: layer_(cell) {}
|
||||
|
||||
output_type operator()(
|
||||
const Tensor& input,
|
||||
@ -922,7 +922,7 @@ struct PackedLayer : Layer<PackedSequence, hidden_type, cell_params> {
|
||||
typename Layer<PackedSequence, hidden_type, cell_params>::output_type;
|
||||
|
||||
PackedLayer(Cell<hidden_type, cell_params>& cell)
|
||||
: cell_(cell) {};
|
||||
: cell_(cell) {}
|
||||
|
||||
output_type operator()(
|
||||
const PackedSequence& input,
|
||||
@ -983,7 +983,7 @@ struct ReversedPackedLayer : Layer<PackedSequence, hidden_type, cell_params> {
|
||||
typename Layer<PackedSequence, hidden_type, cell_params>::output_type;
|
||||
|
||||
ReversedPackedLayer(Cell<hidden_type, cell_params>& cell)
|
||||
: cell_(cell) {};
|
||||
: cell_(cell) {}
|
||||
|
||||
output_type operator()(
|
||||
const PackedSequence& input,
|
||||
@ -1040,7 +1040,7 @@ struct PackedBidirectionalLayer
|
||||
typename Layer<PackedSequence, hidden_type, param_type>::output_type;
|
||||
|
||||
PackedBidirectionalLayer(Cell<dir_hidden_type, cell_params>& cell)
|
||||
: layer_(cell), rev_layer_(cell) {};
|
||||
: layer_(cell), rev_layer_(cell) {}
|
||||
|
||||
output_type operator()(
|
||||
const PackedSequence& input,
|
||||
@ -1889,7 +1889,7 @@ static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_tanh_cell_dynamic, simple
|
||||
|
||||
namespace {
|
||||
|
||||
static C10_UNUSED auto ensure_linear_params_registered = register_linear_params();
|
||||
C10_UNUSED static auto ensure_linear_params_registered = register_linear_params();
|
||||
|
||||
static auto cell_params_base_registry =
|
||||
torch::selective_class_<CellParamsBase>("rnn", TORCH_SELECTIVE_CLASS("CellParamsBase"))
|
||||
|
||||
@ -753,11 +753,11 @@ Tensor cumprod_backward(const Tensor& grad, const Tensor& input, int64_t dim, co
|
||||
namespace {
|
||||
#ifdef _MSC_VER
|
||||
template<typename T>
|
||||
inline typename std::enable_if<std::is_integral<T>::value, bool>::type isnan_(T x) {
|
||||
inline std::enable_if_t<std::is_integral_v<T>, bool> isnan_(T x) {
|
||||
return false;
|
||||
}
|
||||
template<typename T>
|
||||
inline typename std::enable_if<!std::is_integral<T>::value, bool>::type isnan_(T x) {
|
||||
inline std::enable_if_t<!std::is_integral_v<T>, bool> isnan_(T x) {
|
||||
return std::isnan(x);
|
||||
}
|
||||
#else
|
||||
|
||||
@ -207,7 +207,7 @@ inline TensorIterator make_reduction(
|
||||
return TensorIterator::reduce_op(viewed_result, self.to(in_dtype));
|
||||
}
|
||||
|
||||
inline C10_UNUSED TensorIterator make_reduction(
|
||||
C10_UNUSED inline TensorIterator make_reduction(
|
||||
const char* name, Tensor& result, const Tensor& self,
|
||||
at::OptionalIntArrayRef dim, bool keepdim, ScalarType out_dtype) {
|
||||
// special case for type promotion in mixed precision, improves computational
|
||||
@ -259,7 +259,7 @@ inline TensorIterator make_reduction(
|
||||
return TensorIterator::reduce_op(viewed_result1, viewed_result2, self.to(dtype1));
|
||||
}
|
||||
|
||||
inline C10_UNUSED TensorIterator make_reduction(
|
||||
C10_UNUSED inline TensorIterator make_reduction(
|
||||
const char* name, Tensor& result1, Tensor& result2, const Tensor& self,
|
||||
at::OptionalIntArrayRef dim, bool keepdim, ScalarType dtype) {
|
||||
return make_reduction(name, result1, result2, self, dim, keepdim, dtype, dtype);
|
||||
@ -313,7 +313,7 @@ inline std::vector<int64_t> get_zero_numel_tensor_size(
|
||||
// This function should be called when you are reducing a zero-numel tensor and want to
|
||||
// resize the output and return it. This function exists for resizing zero-numel
|
||||
// tensors when the size of the reduction dimension is non-zero.
|
||||
inline C10_UNUSED void zero_numel_tensor_resize(Tensor& result, Tensor& result_indices,
|
||||
C10_UNUSED inline void zero_numel_tensor_resize(Tensor& result, Tensor& result_indices,
|
||||
const Tensor& self, const int64_t dim,
|
||||
const bool keepdim, const char *fn_name) {
|
||||
auto sizes = get_zero_numel_tensor_size(self, dim, keepdim, fn_name);
|
||||
@ -349,7 +349,7 @@ inline ScalarType get_dtype_from_result(Tensor& result, std::optional<ScalarType
|
||||
|
||||
namespace at::meta {
|
||||
|
||||
inline C10_UNUSED DimVector get_reduction_shape(
|
||||
C10_UNUSED inline DimVector get_reduction_shape(
|
||||
const Tensor& self,
|
||||
IntArrayRef dims,
|
||||
bool keepdim,
|
||||
@ -434,7 +434,7 @@ inline TensorIterator make_reduction(
|
||||
return TensorIterator::reduce_op(viewed_result1, viewed_result2, self.to(dtype1));
|
||||
}
|
||||
|
||||
inline C10_UNUSED TensorIterator make_reduction_from_out_ty(
|
||||
C10_UNUSED inline TensorIterator make_reduction_from_out_ty(
|
||||
const Tensor& self,
|
||||
const Tensor& result,
|
||||
OptionalIntArrayRef opt_dims,
|
||||
|
||||
@ -52,7 +52,7 @@ inline void gather_shape_check(const Tensor& self, int64_t dim,
|
||||
ensure_nonempty_size(index, i) <= ensure_nonempty_size(self, i),
|
||||
"Size does not match at dimension ", i,
|
||||
" expected index ", index.sizes(),
|
||||
" to be smaller than self ", self.sizes(),
|
||||
" to be no larger than self ", self.sizes(),
|
||||
" apart from dimension ", dim
|
||||
);
|
||||
}
|
||||
@ -109,15 +109,15 @@ inline void scatter_shape_check(
|
||||
|
||||
TORCH_CHECK(!is_wrong_shape,
|
||||
"Expected index ", index.sizes(),
|
||||
" to be smaller than self ", self.sizes(),
|
||||
" to be no larger than self ", self.sizes(),
|
||||
" apart from dimension ", dim,
|
||||
" and to be smaller size than src ", src.sizes()
|
||||
" and to be no larger size than src ", src.sizes()
|
||||
);
|
||||
}
|
||||
else {
|
||||
TORCH_CHECK(!is_wrong_shape,
|
||||
"Expected index ", index.sizes(),
|
||||
" to be smaller than self ", self.sizes(),
|
||||
" to be no larger than self ", self.sizes(),
|
||||
" apart from dimension ", dim
|
||||
);
|
||||
}
|
||||
|
||||
@ -216,15 +216,6 @@
|
||||
#include <vector>
|
||||
|
||||
namespace at::meta {
|
||||
inline void cat_check_no_zero_dim(const MaterializedITensorListRef& tensors) {
|
||||
size_t i = 0;
|
||||
for (const Tensor& t : tensors) {
|
||||
TORCH_CHECK(
|
||||
t.dim() > 0,
|
||||
"zero-dimensional tensor (at position ", i, ") cannot be concatenated");
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
inline c10::MemoryFormat cat_compute_output_memory_format(const MaterializedITensorListRef& inputs) {
|
||||
std::optional<c10::MemoryFormat> format = std::nullopt;
|
||||
@ -248,7 +239,7 @@ TORCH_PRECOMPUTE_META_FUNC(cat)(const ITensorListRef& tensors, int64_t dim) {
|
||||
// size (i.e. other empty sizes are not skipped).
|
||||
auto materialized = tensors.materialize();
|
||||
|
||||
cat_check_no_zero_dim(materialized);
|
||||
native::check_cat_no_zero_dim(materialized);
|
||||
dim = at::legacy_cat_wrap_dim(dim, materialized);
|
||||
|
||||
// Checking names before the actual dimensions.
|
||||
|
||||
@ -30,7 +30,7 @@ inline void check_cat_shape_except_dim(const Tensor & first, const Tensor & seco
|
||||
}
|
||||
|
||||
inline void check_cat_no_zero_dim(const MaterializedITensorListRef& tensors) {
|
||||
int64_t i = 0;
|
||||
[[maybe_unused]] int64_t i = 0;
|
||||
for(const Tensor& t : tensors) {
|
||||
TORCH_CHECK(t.dim() > 0,
|
||||
"zero-dimensional tensor (at position ", i, ") cannot be concatenated");
|
||||
|
||||
@ -29,7 +29,7 @@ namespace {
|
||||
// grad_in does not mean that it is a gradient wrt to input,
|
||||
// grad_in/grad_out is just an input/output of unfold_backward kernel.
|
||||
|
||||
static C10_UNUSED TensorIterator _make_unfold_backward_iter_over_grad_out(
|
||||
C10_UNUSED static TensorIterator _make_unfold_backward_iter_over_grad_out(
|
||||
Tensor& grad_out,
|
||||
const Tensor& grad_in,
|
||||
int64_t dim,
|
||||
|
||||
@ -103,7 +103,7 @@ DECLARE_DISPATCH(upsampling_bicubic2d, upsample_bicubic2d_kernel);
|
||||
DECLARE_DISPATCH(_upsampling_bicubic2d_aa, _upsample_bicubic2d_aa_kernel);
|
||||
DECLARE_DISPATCH(_upsampling_bicubic2d_aa, _upsample_bicubic2d_aa_backward_kernel);
|
||||
|
||||
inline C10_UNUSED std::array<int64_t, 3> upsample_1d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
|
||||
C10_UNUSED inline std::array<int64_t, 3> upsample_1d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
|
||||
TORCH_CHECK(
|
||||
output_size.size() == 1,
|
||||
"It is expected output_size equals to 1, but got size ",
|
||||
@ -131,7 +131,7 @@ inline C10_UNUSED std::array<int64_t, 3> upsample_1d_common_check(IntArrayRef in
|
||||
return {nbatch, channels, output_width};
|
||||
}
|
||||
|
||||
inline C10_UNUSED std::array<int64_t, 4> upsample_2d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
|
||||
C10_UNUSED inline std::array<int64_t, 4> upsample_2d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
|
||||
TORCH_CHECK(
|
||||
output_size.size() == 2,
|
||||
"It is expected output_size equals to 2, but got size ",
|
||||
@ -167,7 +167,7 @@ inline C10_UNUSED std::array<int64_t, 4> upsample_2d_common_check(IntArrayRef in
|
||||
return {nbatch, channels, output_height, output_width};
|
||||
}
|
||||
|
||||
inline C10_UNUSED
|
||||
C10_UNUSED inline
|
||||
std::array<int64_t, 5> upsample_3d_common_check(IntArrayRef input_size, IntArrayRef output_size) {
|
||||
TORCH_CHECK(
|
||||
output_size.size() == 3,
|
||||
|
||||
@ -40,7 +40,7 @@ int register_linear_params() {
|
||||
}
|
||||
|
||||
namespace {
|
||||
static C10_UNUSED auto linear_params = register_linear_params();
|
||||
C10_UNUSED static auto linear_params = register_linear_params();
|
||||
} // namespace
|
||||
|
||||
}} // namespace ao::sparse
|
||||
|
||||
@ -96,7 +96,7 @@ auto sum(int64_t N, Func f) {
|
||||
}
|
||||
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
typename std::enable_if<std::is_same<scalar_t, opmath_t>::value, void>::type
|
||||
std::enable_if_t<std::is_same_v<scalar_t, opmath_t>, void>
|
||||
gemm_notrans_(
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
@ -132,7 +132,7 @@ gemm_notrans_(
|
||||
|
||||
// std::is_same<scalar_t, at::BFloat16> || std::is_same<scalar_t, at::Half>
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
typename std::enable_if<!std::is_same<scalar_t, opmath_t>::value, void>::type
|
||||
std::enable_if_t<!std::is_same_v<scalar_t, opmath_t>, void>
|
||||
gemm_notrans_(
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
@ -222,7 +222,7 @@ void gemm_transb_impl(
|
||||
}
|
||||
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
typename std::enable_if<std::is_same<scalar_t, opmath_t>::value, void>::type
|
||||
std::enable_if_t<std::is_same_v<scalar_t, opmath_t>, void>
|
||||
gemm_transb_(
|
||||
TransposeType transb,
|
||||
int64_t m,
|
||||
@ -244,7 +244,7 @@ gemm_transb_(
|
||||
|
||||
// std::is_same<scalar_t, at::BFloat16> || std::is_same<scalar_t, at::Half>
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
typename std::enable_if<!std::is_same<scalar_t, opmath_t>::value, void>::type
|
||||
std::enable_if_t<!std::is_same_v<scalar_t, opmath_t>, void>
|
||||
gemm_transb_(
|
||||
TransposeType transb,
|
||||
int64_t m,
|
||||
|
||||
@ -82,7 +82,7 @@ static void reduced_float_copy_kernel(TensorIteratorBase &iter, bool requires_ne
|
||||
std::copy_n(base, 2, data.data());
|
||||
const int64_t *outer_strides = &strides[2];
|
||||
|
||||
for (const auto it C10_UNUSED : c10::irange(size1)) {
|
||||
for (C10_UNUSED const auto it : c10::irange(size1)) {
|
||||
Vecd dst_s;
|
||||
if (strides_in[0] == 0) {
|
||||
dst_s = Vecd(dest_t(*((scalar_t*)data[1])));
|
||||
@ -151,7 +151,7 @@ static void reduced_float_copy_kernel(TensorIteratorBase &iter, bool requires_ne
|
||||
std::copy_n(base, 2, data.data());
|
||||
const int64_t *outer_strides = &strides[2];
|
||||
|
||||
for (const auto it C10_UNUSED : c10::irange(size1)) {
|
||||
for (C10_UNUSED const auto it : c10::irange(size1)) {
|
||||
Vecd dst_s;
|
||||
if (strides_in[0] == 0) {
|
||||
dst_s = Vecd(dest_t(*((source_t*)data[1])));
|
||||
|
||||
@ -12,10 +12,10 @@ namespace at::native {
|
||||
namespace{
|
||||
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
typename std::enable_if<
|
||||
std::is_same<scalar_t, Half>::value || std::is_same<scalar_t, BFloat16>::value,
|
||||
void>::
|
||||
type inline adagrad_math(
|
||||
std::enable_if_t<
|
||||
std::is_same_v<scalar_t, Half> || std::is_same_v<scalar_t, BFloat16>,
|
||||
void>
|
||||
inline adagrad_math(
|
||||
scalar_t* param_ptr,
|
||||
scalar_t* grad_ptr,
|
||||
scalar_t* state_sum_ptr,
|
||||
@ -81,10 +81,10 @@ typename std::enable_if<
|
||||
|
||||
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
typename std::enable_if<
|
||||
std::is_same<scalar_t, float>::value || std::is_same<scalar_t, double>::value,
|
||||
void>::
|
||||
type inline adagrad_math(
|
||||
std::enable_if_t<
|
||||
std::is_same_v<scalar_t, float> || std::is_same_v<scalar_t, double>,
|
||||
void>
|
||||
inline adagrad_math(
|
||||
scalar_t* param_ptr,
|
||||
scalar_t* grad_ptr,
|
||||
scalar_t* state_sum_ptr,
|
||||
|
||||
@ -12,10 +12,10 @@ namespace at::native {
|
||||
namespace{
|
||||
|
||||
template <typename scalar_t, typename opmath_t, ADAM_MODE adam_mode>
|
||||
typename std::enable_if<
|
||||
std::is_same<scalar_t, Half>::value || std::is_same<scalar_t, BFloat16>::value,
|
||||
void>::
|
||||
type inline adam_math(
|
||||
std::enable_if_t<
|
||||
std::is_same_v<scalar_t, Half> || std::is_same_v<scalar_t, BFloat16>,
|
||||
void>
|
||||
inline adam_math(
|
||||
scalar_t* param_ptr,
|
||||
scalar_t* exp_avg_ptr,
|
||||
scalar_t* exp_avg_sq_ptr,
|
||||
@ -155,10 +155,10 @@ typename std::enable_if<
|
||||
|
||||
|
||||
template <typename scalar_t, typename opmath_t, ADAM_MODE adam_mode>
|
||||
typename std::enable_if<
|
||||
std::is_same<scalar_t, float>::value || std::is_same<scalar_t, double>::value,
|
||||
void>::
|
||||
type inline adam_math(
|
||||
std::enable_if_t<
|
||||
std::is_same_v<scalar_t, float> || std::is_same_v<scalar_t, double>,
|
||||
void>
|
||||
inline adam_math(
|
||||
scalar_t* param_ptr,
|
||||
scalar_t* exp_avg_ptr,
|
||||
scalar_t* exp_avg_sq_ptr,
|
||||
|
||||
@ -12,10 +12,10 @@ namespace at::native {
|
||||
namespace{
|
||||
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
typename std::enable_if<
|
||||
std::is_same<scalar_t, Half>::value || std::is_same<scalar_t, BFloat16>::value,
|
||||
void>::
|
||||
type inline sgd_math(
|
||||
std::enable_if_t<
|
||||
std::is_same_v<scalar_t, Half> || std::is_same_v<scalar_t, BFloat16>,
|
||||
void>
|
||||
inline sgd_math(
|
||||
scalar_t* param_ptr,
|
||||
scalar_t* grad_ptr,
|
||||
scalar_t* momentum_buf_ptr,
|
||||
@ -104,10 +104,10 @@ typename std::enable_if<
|
||||
|
||||
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
typename std::enable_if<
|
||||
std::is_same<scalar_t, float>::value || std::is_same<scalar_t, double>::value,
|
||||
void>::
|
||||
type inline sgd_math(
|
||||
std::enable_if_t<
|
||||
std::is_same_v<scalar_t, float> || std::is_same_v<scalar_t, double>,
|
||||
void>
|
||||
inline sgd_math(
|
||||
scalar_t* param_ptr,
|
||||
scalar_t* grad_ptr,
|
||||
scalar_t* momentum_buf_ptr,
|
||||
|
||||
@ -31,14 +31,16 @@ struct IsContiguous<0, -1, traits, s> {
|
||||
};
|
||||
|
||||
// output and all inputs are contiguous
|
||||
template <typename traits,
|
||||
typename std::enable_if<std::is_void<typename traits::result_type>::value>::type* = nullptr>
|
||||
template <
|
||||
typename traits,
|
||||
std::enable_if_t<std::is_void_v<typename traits::result_type>>* =
|
||||
nullptr>
|
||||
static inline bool is_contiguous(const int64_t* strides) {
|
||||
return IsContiguous<traits::arity, traits::arity - 1, traits>::eval(strides);
|
||||
}
|
||||
|
||||
template <typename traits,
|
||||
typename std::enable_if<!std::is_void<typename traits::result_type>::value>::type* = nullptr>
|
||||
std::enable_if_t<!std::is_void_v<typename traits::result_type>>* = nullptr>
|
||||
static inline bool is_contiguous(const int64_t* strides) {
|
||||
return IsContiguous<traits::arity, traits::arity, traits>::eval(strides);
|
||||
}
|
||||
@ -46,14 +48,14 @@ static inline bool is_contiguous(const int64_t* strides) {
|
||||
// input at `s` is scalar (stride 0); output and other inputs are contiguous
|
||||
// NB: output is typically at strides[0] so first input corresponds to s=1
|
||||
template <typename traits, int s,
|
||||
typename std::enable_if<std::is_void<typename traits::result_type>::value>::type* = nullptr>
|
||||
std::enable_if_t<std::is_void_v<typename traits::result_type>>* = nullptr>
|
||||
static inline bool is_contiguous_scalar(const int64_t* strides) {
|
||||
static_assert(s > 0 && s <= traits::arity, "scalar argument index out of bounds");
|
||||
return IsContiguous<traits::arity, traits::arity - 1, traits, s>::eval(strides);
|
||||
}
|
||||
|
||||
template <typename traits, int s,
|
||||
typename std::enable_if<!std::is_void<typename traits::result_type>::value>::type* = nullptr>
|
||||
std::enable_if_t<!std::is_void_v<typename traits::result_type>>* = nullptr>
|
||||
static inline bool is_contiguous_scalar(const int64_t* strides) {
|
||||
static_assert(s > 0 && s <= traits::arity, "scalar argument index out of bounds");
|
||||
return IsContiguous<traits::arity, traits::arity, traits, s>::eval(strides);
|
||||
|
||||
@ -271,7 +271,7 @@ struct VectorizedLoop2d {
|
||||
const int64_t *outer_strides = &strides[ntensors];
|
||||
|
||||
if (is_contiguous<traits>(strides)) {
|
||||
for (const auto i C10_UNUSED : c10::irange(size1)) {
|
||||
for (C10_UNUSED const auto i : c10::irange(size1)) {
|
||||
vectorized_loop(data.data(), size0, 0, op, vop);
|
||||
advance(data, outer_strides);
|
||||
}
|
||||
@ -279,12 +279,12 @@ struct VectorizedLoop2d {
|
||||
using Indices = std::make_index_sequence<traits::arity>;
|
||||
unroll_contiguous_scalar_checks<traits>(strides, Indices{}, [&](size_t idx) {
|
||||
if (idx) {
|
||||
for (const auto i C10_UNUSED : c10::irange(size1)) {
|
||||
for (C10_UNUSED const auto i : c10::irange(size1)) {
|
||||
vectorized_loop(data.data(), size0, idx, op, vop);
|
||||
advance(data, outer_strides);
|
||||
}
|
||||
} else {
|
||||
for (const auto i C10_UNUSED : c10::irange(size1)) {
|
||||
for (C10_UNUSED const auto i : c10::irange(size1)) {
|
||||
basic_loop(data.data(), strides, 0, size0, op);
|
||||
advance(data, outer_strides);
|
||||
}
|
||||
|
||||
@ -64,7 +64,7 @@ vec::Vectorized<int64_t> is_nan_vec<int64_t>(vec::Vectorized<int64_t> vec) {
|
||||
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
inline
|
||||
typename std::enable_if<std::is_same<scalar_t, opmath_t>::value, void>::type
|
||||
std::enable_if_t<std::is_same_v<scalar_t, opmath_t>, void>
|
||||
compute_internal(
|
||||
const scalar_t* input_data,
|
||||
scalar_t* out_data,
|
||||
@ -139,7 +139,7 @@ compute_internal(
|
||||
// std::is_same<scalar_t, at::BFloat16> || std::is_same<scalar_t, at::Half>
|
||||
template <typename scalar_t, typename opmath_t>
|
||||
inline
|
||||
typename std::enable_if<!std::is_same<scalar_t, opmath_t>::value, void>::type
|
||||
std::enable_if_t<!std::is_same_v<scalar_t, opmath_t>, void>
|
||||
compute_internal(
|
||||
const scalar_t* input_data,
|
||||
scalar_t* out_data,
|
||||
|
||||
@ -129,13 +129,13 @@ static void set_results(const res_t result, const TensorIteratorBase &iter, cons
|
||||
}
|
||||
|
||||
template<typename traits, std::size_t i = 0, typename... tuple_t>
|
||||
inline typename std::enable_if<i == sizeof...(tuple_t), std::size_t>::type
|
||||
inline std::enable_if_t<i == sizeof...(tuple_t), std::size_t>
|
||||
for_each_in_tuple(const std::tuple<tuple_t...>& /*t*/, const TensorIteratorBase& /*iter*/, const int /*num_outputs*/) {
|
||||
return i;
|
||||
}
|
||||
|
||||
template<typename traits, std::size_t i = 0, typename... tuple_t>
|
||||
inline typename std::enable_if<i < sizeof...(tuple_t), std::size_t>::type
|
||||
inline std::enable_if_t<i < sizeof...(tuple_t), std::size_t>
|
||||
for_each_in_tuple(const std::tuple<tuple_t...>& t, const TensorIteratorBase &iter, const int num_outputs) {
|
||||
if (i < (size_t)num_outputs) {
|
||||
set_result<traits>(i, std::get<i>(t), iter, num_outputs);
|
||||
|
||||
@ -106,7 +106,7 @@ inline void _init(scalar_t* self_ptr, at::opmath_type<scalar_t>* buffer_ptr, int
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
inline typename std::enable_if<!std::is_same<scalar_t, Vec2>::value, scalar_t>::type
|
||||
inline std::enable_if_t<!std::is_same_v<scalar_t, Vec2>, scalar_t>
|
||||
_max(const scalar_t& x, const scalar_t& y) {
|
||||
return at::_isnan(y) ? y : std::max(x, y);
|
||||
}
|
||||
@ -118,14 +118,14 @@ inline Vectorized<scalar_t> _max(const Vectorized<scalar_t>& x, const Vectorized
|
||||
}
|
||||
|
||||
template <typename vec_t>
|
||||
inline typename std::enable_if<std::is_same<vec_t, Vec2>::value, Vec2>::type
|
||||
inline std::enable_if_t<std::is_same_v<vec_t, Vec2>, Vec2>
|
||||
_max(const vec_t& x, const vec_t& y) {
|
||||
// vec::maximum propagates NaN
|
||||
return maximum(x, y);
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
inline typename std::enable_if<!std::is_same<scalar_t, Vec2>::value, scalar_t>::type
|
||||
inline std::enable_if_t<!std::is_same_v<scalar_t, Vec2>, scalar_t>
|
||||
_min(const scalar_t& x, const scalar_t& y) {
|
||||
return at::_isnan(y) ? y : std::min(x, y);
|
||||
}
|
||||
@ -137,7 +137,7 @@ inline Vectorized<scalar_t> _min(const Vectorized<scalar_t>& x, const Vectorized
|
||||
}
|
||||
|
||||
template <typename vec_t>
|
||||
inline typename std::enable_if<std::is_same<vec_t, Vec2>::value, Vec2>::type
|
||||
inline std::enable_if_t<std::is_same_v<vec_t, Vec2>, Vec2>
|
||||
_min(const vec_t& x, const vec_t& y) {
|
||||
// vec::minimum propagates NaN
|
||||
return minimum(x, y);
|
||||
|
||||
@ -76,7 +76,7 @@ void _unfold_backward_internal_kernel(
|
||||
auto* RESTRICT grad_in_ptr = data[1];
|
||||
auto* RESTRICT idx_dim_ptr = data[2];
|
||||
|
||||
for (const auto elem C10_UNUSED : c10::irange(nelems)) {
|
||||
for (C10_UNUSED const auto elem : c10::irange(nelems)) {
|
||||
auto* RESTRICT grad_out_data = reinterpret_cast<scalar_t*>(grad_out_ptr);
|
||||
auto* RESTRICT grad_in_data = reinterpret_cast<scalar_t*>(grad_in_ptr);
|
||||
|
||||
|
||||
@ -102,7 +102,7 @@ void pack_rgb(
|
||||
|
||||
TORCH_INTERNAL_ASSERT(unpacked_increment == 3 || unpacked_increment == 4);
|
||||
|
||||
for (const auto i C10_UNUSED : c10::irange(num_pixels)) {
|
||||
for (C10_UNUSED const auto i : c10::irange(num_pixels)) {
|
||||
for (const auto j : c10::irange(num_channels)) {
|
||||
packed[j * packed_stride] = unpacked[j];
|
||||
}
|
||||
|
||||
@ -85,8 +85,8 @@ void GroupNormKernelImplInternal(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<std::is_same<T, at::opmath_type<T>>::value,
|
||||
std::tuple<T, T>>::type
|
||||
std::enable_if_t<std::is_same_v<T, at::opmath_type<T>>,
|
||||
std::tuple<T, T>>
|
||||
ColumnwiseMoments(
|
||||
const T* X_data,
|
||||
int64_t HxW,
|
||||
@ -118,8 +118,8 @@ ColumnwiseMoments(
|
||||
|
||||
// std::is_same<T, at::BFloat16> || std::is_same<T, at::Half>
|
||||
template <typename T>
|
||||
typename std::enable_if<!std::is_same<T, at::opmath_type<T>>::value,
|
||||
std::tuple<at::opmath_type<T>, at::opmath_type<T>>>::type
|
||||
std::enable_if_t<!std::is_same_v<T, at::opmath_type<T>>,
|
||||
std::tuple<at::opmath_type<T>, at::opmath_type<T>>>
|
||||
ColumnwiseMoments(
|
||||
const T* X_data,
|
||||
int64_t HxW,
|
||||
@ -160,7 +160,7 @@ ColumnwiseMoments(
|
||||
}
|
||||
|
||||
template <typename T, typename opmath_t>
|
||||
inline typename std::enable_if<std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<std::is_same_v<T, opmath_t>, void>
|
||||
CalcMeanVar(
|
||||
const T* X_ptr,
|
||||
opmath_t* mean_ptr,
|
||||
@ -183,7 +183,7 @@ CalcMeanVar(
|
||||
|
||||
// std::is_same<T, at::BFloat16> || std::is_same<T, at::Half>
|
||||
template <typename T, typename opmath_t>
|
||||
inline typename std::enable_if<!std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<!std::is_same_v<T, opmath_t>, void>
|
||||
CalcMeanVar(
|
||||
const T* X_ptr,
|
||||
opmath_t* mean_ptr,
|
||||
@ -227,7 +227,7 @@ CalcMeanVar(
|
||||
}
|
||||
|
||||
template <typename T, typename opmath_t>
|
||||
inline typename std::enable_if<std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<std::is_same_v<T, opmath_t>, void>
|
||||
ApplyScaleBias(
|
||||
T* Y_ptr,
|
||||
const T* X_ptr,
|
||||
@ -246,7 +246,7 @@ ApplyScaleBias(
|
||||
|
||||
// std::is_same<T, at::BFloat16> || std::is_same<T, at::Half>
|
||||
template <typename T, typename opmath_t>
|
||||
inline typename std::enable_if<!std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<!std::is_same_v<T, opmath_t>, void>
|
||||
ApplyScaleBias(
|
||||
T* Y_ptr,
|
||||
const T* X_ptr,
|
||||
@ -529,7 +529,7 @@ void GroupNormKernelImpl(
|
||||
|
||||
|
||||
template <typename T, typename opmath_t>
|
||||
typename std::enable_if<std::is_same<T, opmath_t>::value, void>::type
|
||||
std::enable_if_t<std::is_same_v<T, opmath_t>, void>
|
||||
ComputeInternalGradients(
|
||||
int64_t N,
|
||||
int64_t C,
|
||||
@ -556,7 +556,7 @@ ComputeInternalGradients(
|
||||
}
|
||||
|
||||
template <typename T, typename opmath_t>
|
||||
typename std::enable_if<!std::is_same<T, opmath_t>::value, void>::type
|
||||
std::enable_if_t<!std::is_same_v<T, opmath_t>, void>
|
||||
ComputeInternalGradients(
|
||||
int64_t N,
|
||||
int64_t C,
|
||||
@ -603,7 +603,7 @@ ComputeInternalGradients(
|
||||
}
|
||||
|
||||
template <typename PT, typename opmath_t>
|
||||
inline typename std::enable_if<std::is_same<PT, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<std::is_same_v<PT, opmath_t>, void>
|
||||
CalcDsDb(
|
||||
const opmath_t* ds_ptr,
|
||||
const opmath_t* db_ptr,
|
||||
@ -626,7 +626,7 @@ CalcDsDb(
|
||||
}
|
||||
|
||||
template <typename PT, typename opmath_t>
|
||||
inline typename std::enable_if<!std::is_same<PT, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<!std::is_same_v<PT, opmath_t>, void>
|
||||
CalcDsDb(
|
||||
const opmath_t* ds_ptr,
|
||||
const opmath_t* db_ptr,
|
||||
@ -708,7 +708,7 @@ void GroupNormInputBackward(
|
||||
}
|
||||
|
||||
template <typename PT, typename opmath_t>
|
||||
typename std::enable_if<std::is_same<PT, opmath_t>::value, void>::type
|
||||
std::enable_if_t<std::is_same_v<PT, opmath_t>, void>
|
||||
GammaBackward(
|
||||
int64_t N,
|
||||
int64_t C,
|
||||
@ -755,7 +755,7 @@ GammaBackward(
|
||||
}
|
||||
|
||||
template <typename PT, typename opmath_t>
|
||||
typename std::enable_if<!std::is_same<PT, opmath_t>::value, void>::type
|
||||
std::enable_if_t<!std::is_same_v<PT, opmath_t>, void>
|
||||
GammaBackward(
|
||||
int64_t N,
|
||||
int64_t C,
|
||||
@ -817,7 +817,7 @@ GammaBackward(
|
||||
}
|
||||
|
||||
template <typename PT, typename opmath_t>
|
||||
typename std::enable_if<std::is_same<PT, opmath_t>::value, void>::type
|
||||
std::enable_if_t<std::is_same_v<PT, opmath_t>, void>
|
||||
BetaBackward(int64_t N, int64_t C, const opmath_t* db, PT* dbeta) {
|
||||
using Vec = at::vec::Vectorized<PT>;
|
||||
constexpr int64_t K = Vec::size();
|
||||
@ -841,7 +841,7 @@ BetaBackward(int64_t N, int64_t C, const opmath_t* db, PT* dbeta) {
|
||||
}
|
||||
|
||||
template <typename PT, typename opmath_t>
|
||||
typename std::enable_if<!std::is_same<PT, opmath_t>::value, void>::type
|
||||
std::enable_if_t<!std::is_same_v<PT, opmath_t>, void>
|
||||
BetaBackward(int64_t N, int64_t C, const opmath_t* db, PT* dbeta) {
|
||||
using Vec = at::vec::Vectorized<PT>;
|
||||
using fVec = at::vec::Vectorized<opmath_t>;
|
||||
@ -937,7 +937,7 @@ void GroupNormBackwardKernelImplInternal(
|
||||
}
|
||||
|
||||
template <typename T, typename opmath_t>
|
||||
inline typename std::enable_if<std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<std::is_same_v<T, opmath_t>, void>
|
||||
DsDbRowwiseMomentsChannelsLast(
|
||||
const T* dY_ptr,
|
||||
const T* X_ptr,
|
||||
@ -972,7 +972,7 @@ DsDbRowwiseMomentsChannelsLast(
|
||||
}
|
||||
|
||||
template <typename T, typename opmath_t>
|
||||
inline typename std::enable_if<!std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<!std::is_same_v<T, opmath_t>, void>
|
||||
DsDbRowwiseMomentsChannelsLast(
|
||||
const T* dY_ptr,
|
||||
const T* X_ptr,
|
||||
@ -1024,10 +1024,10 @@ DsDbRowwiseMomentsChannelsLast(
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline typename std::enable_if<std::is_same<T, at::opmath_type<T>>::value,
|
||||
inline std::enable_if_t<std::is_same_v<T, at::opmath_type<T>>,
|
||||
std::tuple<
|
||||
vec::Vectorized<T>,
|
||||
vec::Vectorized<T>>>::type
|
||||
vec::Vectorized<T>>>
|
||||
load_util(const T* data_ptr, int64_t n) {
|
||||
using Vec = vec::Vectorized<T>;
|
||||
auto vec0 = Vec::loadu(data_ptr, n > Vec::size() ? Vec::size() : n);
|
||||
@ -1037,11 +1037,11 @@ load_util(const T* data_ptr, int64_t n) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline typename std::enable_if<!std::is_same<T, at::opmath_type<T>>::value,
|
||||
inline std::enable_if_t<!std::is_same_v<T, at::opmath_type<T>>,
|
||||
std::tuple<
|
||||
vec::Vectorized<at::opmath_type<T>>,
|
||||
vec::Vectorized<at::opmath_type<T>>>
|
||||
>::type
|
||||
>
|
||||
load_util(const T* data_ptr, int64_t n) {
|
||||
using Vec = vec::Vectorized<T>;
|
||||
auto vec = Vec::loadu(data_ptr, n);
|
||||
@ -1049,7 +1049,7 @@ load_util(const T* data_ptr, int64_t n) {
|
||||
}
|
||||
|
||||
template <typename T, typename PT, typename opmath_t>
|
||||
inline typename std::enable_if<std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<std::is_same_v<T, opmath_t>, void>
|
||||
ApplyInputGradientsChannelsLastColMov(
|
||||
const T* dY_data,
|
||||
const T* X_data,
|
||||
@ -1097,7 +1097,7 @@ ApplyInputGradientsChannelsLastColMov(
|
||||
}
|
||||
|
||||
template <typename T, typename PT, typename opmath_t>
|
||||
inline typename std::enable_if<!std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<!std::is_same_v<T, opmath_t>, void>
|
||||
ApplyInputGradientsChannelsLastColMov(
|
||||
const T* dY_data,
|
||||
const T* X_data,
|
||||
@ -1154,7 +1154,7 @@ ApplyInputGradientsChannelsLastColMov(
|
||||
}
|
||||
|
||||
template <typename T, typename PT, typename opmath_t>
|
||||
inline typename std::enable_if<std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<std::is_same_v<T, opmath_t>, void>
|
||||
ApplyInputGradientsChannelsLastRowMov(
|
||||
const T* dY_data,
|
||||
const T* X_data,
|
||||
@ -1190,7 +1190,7 @@ ApplyInputGradientsChannelsLastRowMov(
|
||||
}
|
||||
|
||||
template <typename T, typename PT, typename opmath_t>
|
||||
inline typename std::enable_if<!std::is_same<T, opmath_t>::value, void>::type
|
||||
inline std::enable_if_t<!std::is_same_v<T, opmath_t>, void>
|
||||
ApplyInputGradientsChannelsLastRowMov(
|
||||
const T* dY_data,
|
||||
const T* X_data,
|
||||
|
||||
@ -30,7 +30,7 @@ void resize_bytes_cuda(StorageImpl* storage, size_t size_bytes) {
|
||||
c10::cuda::CUDAGuard guard(device.index());
|
||||
at::DataPtr data = allocator->allocate(size_bytes);
|
||||
if (storage->data_ptr()) {
|
||||
at::globalContext().lazyInitCUDA();
|
||||
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
|
||||
|
||||
C10_CUDA_CHECK(
|
||||
cudaMemcpyAsync(
|
||||
|
||||
@ -1374,7 +1374,7 @@ std::tuple<Tensor, Tensor, Tensor> layer_norm_cuda(
|
||||
for (const auto idx: c10::irange(axis)) {
|
||||
stat_shape.push_back(input_shape[idx]);
|
||||
}
|
||||
for (const auto C10_UNUSED idx: c10::irange(axis, input.dim())) {
|
||||
for (C10_UNUSED const auto idx: c10::irange(axis, input.dim())) {
|
||||
stat_shape.push_back(1);
|
||||
}
|
||||
|
||||
|
||||
@ -74,7 +74,7 @@ cudnn_frontend::Tensor getTensorDescriptorWithTypeVirtual(
|
||||
// Ubuntu-22+ if `libnvrtc.so` is not found on the system, which strictly
|
||||
// speaking is not necessary for usecases below See
|
||||
// https://github.com/pytorch/pytorch/issues/97041
|
||||
static C10_UNUSED auto cudnn_cnn_infer_handler = [] {
|
||||
C10_UNUSED static auto cudnn_cnn_infer_handler = [] {
|
||||
void* handle = dlopen("libcudnn_cnn_infer.so.8", RTLD_LAZY);
|
||||
char* err = dlerror();
|
||||
if (!handle) {
|
||||
|
||||
@ -339,7 +339,7 @@ Tensor mkldnn_linear_pointwise_binary(
|
||||
#if AT_MKL_ENABLED()
|
||||
#include <mkl.h>
|
||||
|
||||
static Tensor mkl_linear(
|
||||
Tensor mkl_linear(
|
||||
const Tensor& self,
|
||||
const Tensor& mkl_weight_t,
|
||||
const Tensor& origin_weight_t,
|
||||
|
||||
@ -22,6 +22,17 @@ C10_API Tensor mkldnn_linear_pointwise_binary(
|
||||
const std::optional<Tensor>& bias_opt,
|
||||
c10::string_view attr);
|
||||
|
||||
#if AT_MKL_ENABLED()
|
||||
|
||||
C10_API Tensor mkl_linear(
|
||||
const Tensor& self,
|
||||
const Tensor& mkl_weight_t,
|
||||
const Tensor& origin_weight_t,
|
||||
const std::optional<Tensor>& bias_opt,
|
||||
const int64_t prepack_batch_size);
|
||||
|
||||
#endif// AT_MKL_ENABLED
|
||||
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
||||
|
||||
@ -31,8 +31,15 @@ typedef NS_ENUM(NSInteger, MTLMathMode)
|
||||
MTLMathModeFast = 2,
|
||||
};
|
||||
|
||||
typedef NS_ENUM(NSInteger, MTLMathFloatingPointFunctions)
|
||||
{
|
||||
MTLMathFloatingPointFunctionsFast = 0,
|
||||
MTLMathFloatingPointFunctionsPrecise = 1,
|
||||
};
|
||||
|
||||
@interface MTLCompileOptions()
|
||||
@property (readwrite, nonatomic) MTLMathMode mathMode;
|
||||
@property (readwrite, nonatomic) MTLMathFloatingPointFunctions mathFloatingPointFunctions;
|
||||
@end
|
||||
|
||||
#endif
|
||||
|
||||
@ -541,18 +541,9 @@ Placeholder::Placeholder(MPSGraphTensor* mpsGraphTensor,
|
||||
MPSShape* mpsShape = getMPSShape(_tensor);
|
||||
MPSShape* mpsStrides = getMPSShape(_tensor.strides());
|
||||
|
||||
IntArrayRef baseShape;
|
||||
if (src.is_view()) {
|
||||
baseShape = src._base().sizes();
|
||||
} else {
|
||||
baseShape = getIMPSAllocator()->getBufferShape(src.storage().data());
|
||||
}
|
||||
int flattenedShaped = 1;
|
||||
for (const auto i : c10::irange(baseShape.size())) {
|
||||
flattenedShaped *= baseShape[i];
|
||||
}
|
||||
MPSShape* mpsBaseShape = @[ @(flattenedShaped) ];
|
||||
MPSNDArrayDescriptor* srcTensorDesc = [MPSNDArrayDescriptor descriptorWithDataType:dataType shape:mpsBaseShape];
|
||||
auto storage_numel = src.storage().nbytes() / src.element_size();
|
||||
MPSNDArrayDescriptor* srcTensorDesc = [MPSNDArrayDescriptor descriptorWithDataType:dataType
|
||||
shape:@[ @(storage_numel) ]];
|
||||
srcTensorDesc.preferPackedRows = YES;
|
||||
MPSNDArray* srcNDArray = [[[MPSNDArray alloc] initWithBuffer:srcBuf
|
||||
offset:src.storage_offset() * src.element_size()
|
||||
@ -848,7 +839,10 @@ id<MTLLibrary> MetalShaderLibrary::getLibrary(const std::initializer_list<std::s
|
||||
}
|
||||
|
||||
id<MTLLibrary> MetalShaderLibrary::compileLibrary(const std::string& src) {
|
||||
static const char* fast_math = std::getenv("PYTORCH_MPS_FAST_MATH");
|
||||
static auto fast_math = []() {
|
||||
auto val = std::getenv("PYTORCH_MPS_FAST_MATH");
|
||||
return val && std::stoi(val) != 0;
|
||||
}();
|
||||
NSError* error = nil;
|
||||
MTLCompileOptions* options = compile_options;
|
||||
if (!options) {
|
||||
@ -856,7 +850,15 @@ id<MTLLibrary> MetalShaderLibrary::compileLibrary(const std::string& src) {
|
||||
// Need 3.0 for atomic oprations, 3.1 introduces bfloat support
|
||||
[options setLanguageVersion:is_macos_13_or_newer(MacOSVersion::MACOS_VER_14_0_PLUS) ? MTLLanguageVersion3_1
|
||||
: MTLLanguageVersion3_0];
|
||||
[options setFastMathEnabled:(!fast_math || std::stoi(fast_math) == 0) ? NO : YES];
|
||||
if (is_macos_13_or_newer(MacOSVersion::MACOS_VER_15_0_PLUS)) {
|
||||
options.mathMode = fast_math ? MTLMathModeFast : MTLMathModeSafe;
|
||||
options.mathFloatingPointFunctions =
|
||||
fast_math ? MTLMathFloatingPointFunctionsFast : MTLMathFloatingPointFunctionsPrecise;
|
||||
} else {
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdeprecated-declarations")
|
||||
[options setFastMathEnabled:fast_math ? YES : NO];
|
||||
C10_DIAGNOSTIC_POP()
|
||||
}
|
||||
}
|
||||
|
||||
const auto str = [NSString stringWithCString:src.c_str() encoding:NSASCIIStringEncoding];
|
||||
|
||||
@ -167,12 +167,7 @@ static Tensor _mps_convolution_impl(const Tensor& input_t_,
|
||||
|
||||
// TODO: MPS convolution kernel currently does not support output channels > 2^16
|
||||
for (auto elem : output_t.sizes()) {
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||
elem <= (1 << 16),
|
||||
"Output channels > 65536 not supported at the MPS device. ",
|
||||
"As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` ",
|
||||
"to use the CPU as a fallback for this op. WARNING: this will be slower than running natively ",
|
||||
"on MPS.");
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(elem <= (1 << 16), "Output channels > 65536 not supported at the MPS device. ");
|
||||
}
|
||||
|
||||
convolution_shape_check(c, input, weight, output, padding, stride, dilation, groups);
|
||||
@ -378,12 +373,7 @@ static Tensor mps_convolution_backward_input(IntArrayRef input_size,
|
||||
|
||||
// TODO: MPS convolution kernel currently does not support output channels > 2^16
|
||||
for (auto elem : grad_output_t.sizes()) {
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||
elem <= (1 << 16),
|
||||
"Output channels > 65536 not supported at the MPS device. ",
|
||||
"As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` ",
|
||||
"to use the CPU as a fallback for this op. WARNING: this will be slower than running natively ",
|
||||
"on MPS.");
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(elem <= (1 << 16), "Output channels > 65536 not supported at the MPS device. ");
|
||||
}
|
||||
|
||||
TORCH_CHECK(isFloatingType(grad_output_t.scalar_type()), "Convolution is supported only for Floating types");
|
||||
|
||||
@ -718,10 +718,15 @@ std::tuple<Tensor, Tensor, Tensor> batch_norm_backward_mps(const Tensor& grad_ou
|
||||
secondaryTensor:epsilonTensor
|
||||
name:nil];
|
||||
#ifdef __MAC_15_0
|
||||
rsqrtTensor = [mpsGraph reciprocalSquareRootWithTensor:varianceEpsTensor name:nil];
|
||||
#else
|
||||
rsqrtTensor = [mpsGraph reverseSquareRootWithTensor:varianceEpsTensor name:nil];
|
||||
#endif
|
||||
if (is_macos_13_or_newer(MacOSVersion::MACOS_VER_15_0_PLUS)) {
|
||||
rsqrtTensor = [mpsGraph reciprocalSquareRootWithTensor:varianceEpsTensor name:nil];
|
||||
} else
|
||||
#endif // __MAC_15_0
|
||||
{
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdeprecated-declarations")
|
||||
rsqrtTensor = [mpsGraph reverseSquareRootWithTensor:varianceEpsTensor name:nil];
|
||||
C10_DIAGNOSTIC_POP()
|
||||
}
|
||||
MPSGraphTensor* bnForwardTensor = [mpsGraph multiplicationWithPrimaryTensor:xMinusMean
|
||||
secondaryTensor:rsqrtTensor
|
||||
name:nil];
|
||||
@ -747,10 +752,15 @@ std::tuple<Tensor, Tensor, Tensor> batch_norm_backward_mps(const Tensor& grad_ou
|
||||
secondaryTensor:epsilonTensor
|
||||
name:nil];
|
||||
#ifdef __MAC_15_0
|
||||
rsqrtTensor = [mpsGraph reciprocalSquareRootWithTensor:varianceEpsTensor name:nil];
|
||||
#else
|
||||
rsqrtTensor = [mpsGraph reverseSquareRootWithTensor:varianceEpsTensor name:nil];
|
||||
#endif
|
||||
if (is_macos_13_or_newer(MacOSVersion::MACOS_VER_15_0_PLUS)) {
|
||||
rsqrtTensor = [mpsGraph reciprocalSquareRootWithTensor:varianceEpsTensor name:nil];
|
||||
} else
|
||||
#endif // __MAC_15_0
|
||||
{
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdeprecated-declarations")
|
||||
rsqrtTensor = [mpsGraph reverseSquareRootWithTensor:varianceEpsTensor name:nil];
|
||||
C10_DIAGNOSTIC_POP()
|
||||
}
|
||||
}
|
||||
|
||||
gradInputTensor = [mpsGraph multiplicationWithPrimaryTensor:unitTensor secondaryTensor:rsqrtTensor name:nil];
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user