Compare commits

..

1 Commits

Author SHA1 Message Date
6c647f94de fx node in c++ 2024-06-07 19:21:15 -07:00
1723 changed files with 19377 additions and 15278 deletions

View File

@ -1,5 +0,0 @@
0.6b
manylinux_2_17
rocm6
04b5df8c8123f90cba3ede7e971e6fbc6040d506
3db6ecbc915893ff967abd6e1b43bd5f54949868873be60dc802086c3863e648

View File

@ -91,9 +91,9 @@ _UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
# configuration, so we hardcode everything here rather than do it
# from scratch
case "$image" in
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9)
CUDA_VERSION=12.4.0
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
@ -105,9 +105,9 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
@ -119,9 +119,9 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks)
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.4.0
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
@ -134,9 +134,9 @@ case "$image" in
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks)
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
@ -149,9 +149,9 @@ case "$image" in
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks)
pytorch-linux-focal-cuda12.1-cudnn8-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=9
PROTOBUF=yes
@ -164,9 +164,9 @@ case "$image" in
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks)
pytorch-linux-focal-cuda12.4-cudnn8-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.4.0
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=9
PROTOBUF=yes
@ -179,9 +179,9 @@ case "$image" in
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9)
pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9)
CUDA_VERSION=11.8.0
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
@ -193,9 +193,9 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9)
CUDA_VERSION=12.4.0
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
@ -207,9 +207,9 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
@ -221,9 +221,9 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9)
CUDA_VERSION=12.4.0
CUDNN_VERSION=9
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
@ -330,10 +330,10 @@ case "$image" in
DOCS=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-jammy-cuda11.8-cudnn9-py3.8-clang12)
pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12)
ANACONDA_PYTHON_VERSION=3.8
CUDA_VERSION=11.8
CUDNN_VERSION=9
CUDNN_VERSION=8
CLANG_VERSION=12
PROTOBUF=yes
DB=yes
@ -380,7 +380,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
CONDA_CMAKE=yes
;;
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter)
pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter)
ANACONDA_PYTHON_VERSION=3.9
CUDA_VERSION=11.8
CONDA_CMAKE=yes
@ -447,7 +447,7 @@ tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
#when using cudnn version 8 install it separately from cuda
if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
if [[ ${CUDNN_VERSION} == 9 ]]; then
if [[ ${CUDNN_VERSION} == 8 ]]; then
IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
fi
fi
@ -499,7 +499,7 @@ docker build \
"$@" \
.
# NVIDIA dockers for RC releases use tag names like `11.0-cudnn9-devel-ubuntu18.04-rc`,
# NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
# for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
# find the correct image. As a result, here we have to replace the
# "$UBUNTU_VERSION" == "18.04-rc"

View File

@ -113,18 +113,18 @@ COPY triton_version.txt triton_version.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt
# Install AOTriton (Early fail)
COPY ./aotriton_version.txt aotriton_version.txt
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"]
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
# Install AOTriton
COPY ci_commit_pins/aotriton.txt aotriton.txt
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN bash ./install_aotriton.sh /opt/rocm/aotriton && rm -rf install_aotriton.sh aotriton aotriton.txt common_utils.sh
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
# Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

View File

@ -0,0 +1 @@
24a3fe9cb57e5cda3c923df29743f9767194cc27

View File

@ -1 +1 @@
01cbe5045a6898c9a925f01435c8277b2fe6afcc
bbe6246e37d8aa791c67daaf9d9d61b26c9ccfdc

31
.ci/docker/common/install_aotriton.sh Executable file → Normal file
View File

@ -4,20 +4,21 @@ set -ex
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
TARBALL='aotriton.tar.bz2'
# This read command alwasy returns with exit code 1
read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true
ARCH=$(uname -m)
AOTRITON_DIR="aotriton"
AOTRITON_PINNED_NAME="aotriton" # No .txt extension
AOTRITON_PINNED_COMMIT=$(get_pinned_commit ${AOTRITON_PINNED_NAME})
AOTRITON_INSTALL_PREFIX="$1"
AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}.tar.bz2"
cd "${AOTRITON_INSTALL_PREFIX}"
# Must use -L to follow redirects
curl -L --retry 3 -o "${TARBALL}" "${AOTRITON_URL}"
ACTUAL_SHA256=$(sha256sum "${TARBALL}" | cut -d " " -f 1)
if [ "${SHA256}" != "${ACTUAL_SHA256}" ]; then
echo -n "Error: The SHA256 of downloaded tarball is ${ACTUAL_SHA256},"
echo " which does not match the expected value ${SHA256}."
exit
fi
tar xf "${TARBALL}" && rm -rf "${TARBALL}"
git clone https://github.com/ROCm/aotriton.git "${AOTRITON_DIR}"
cd "${AOTRITON_DIR}"
git checkout "${AOTRITON_PINNED_COMMIT}"
git submodule sync --recursive
git submodule update --init --recursive --force --depth 1
mkdir build
cd build
cmake .. -G Ninja -DCMAKE_INSTALL_PREFIX=./install_dir -DCMAKE_BUILD_TYPE=Release -DAOTRITON_COMPRESS_KERNEL=OFF -DAOTRITON_NO_PYTHON=ON -DAOTRITON_NO_SHARED=ON
ninja install
mkdir -p "${AOTRITON_INSTALL_PREFIX}"
cp -r install_dir/* "${AOTRITON_INSTALL_PREFIX}"
find /tmp/ -mindepth 1 -delete
rm -rf ~/.triton

View File

@ -3,7 +3,7 @@
set -ex
install_ubuntu() {
# NVIDIA dockers for RC releases use tag names like `11.0-cudnn9-devel-ubuntu18.04-rc`,
# NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
# for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
# find the correct image. As a result, here we have to check for
# "$UBUNTU_VERSION" == "18.04"*

View File

@ -1,18 +1,23 @@
#!/bin/bash
if [[ -n "${CUDNN_VERSION}" ]]; then
if [[ ${CUDNN_VERSION} == 8 ]]; then
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn
pushd tmp_cudnn
if [[ ${CUDA_VERSION:0:2} == "12" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"
elif [[ ${CUDA_VERSION:0:2} == "11" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda11-archive"
if [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.9.7.29_cuda12-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.9.2.26_cuda12-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
else
print "Unsupported CUDA version ${CUDA_VERSION}"
exit 1
fi
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
tar xf ${CUDNN_NAME}.tar.xz
cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/

View File

@ -139,7 +139,7 @@ COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
ARG CUDNN_VERSION
ARG CUDA_VERSION
COPY ./common/install_cudnn.sh install_cudnn.sh
RUN if [ -n "${CUDNN_VERSION}" ]; then bash install_cudnn.sh; fi
RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
RUN rm install_cudnn.sh
# Install CUSPARSELT

View File

@ -105,18 +105,18 @@ COPY triton_version.txt triton_version.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt
# Install AOTriton
COPY ./aotriton_version.txt aotriton_version.txt
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"]
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
# Install AOTriton
COPY ci_commit_pins/aotriton.txt aotriton.txt
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN bash ./install_aotriton.sh /opt/rocm/aotriton && rm -rf install_aotriton.sh aotriton aotriton.txt common_utils.sh
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
# Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

View File

@ -368,7 +368,7 @@ test_inductor_cpp_wrapper_abi_compatible() {
echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
# cpu stack allocation causes segfault and needs more investigation
PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper
python test/run_test.py --include inductor/test_cpu_cpp_wrapper
python test/run_test.py --include inductor/test_cuda_cpp_wrapper
TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \

View File

@ -62,6 +62,4 @@ readability-string-compare,
'
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
WarningsAsErrors: '*'
CheckOptions:
misc-header-include-cycle.IgnoredFilesList: 'format.h;ivalue.h;custom_class.h;Dict.h;List.h'
...

View File

@ -16,17 +16,6 @@ self-hosted-runner:
- linux.8xlarge.nvidia.gpu
- linux.16xlarge.nvidia.gpu
- linux.g5.4xlarge.nvidia.gpu
# Organization-wide AWS Linux Runners on Linux Foundation account
- lf.linux.large
- lf.linux.2xlarge
- lf.linux.4xlarge
- lf.linux.12xlarge
- lf.linux.24xlarge
- lf.linux.arm64.2xlarge
- lf.linux.4xlarge.nvidia.gpu
- lf.linux.8xlarge.nvidia.gpu
- lf.linux.16xlarge.nvidia.gpu
- lf.linux.g5.4xlarge.nvidia.gpu
# Repo-specific IBM hosted S390x runner
- linux.s390x
# Organization wide AWS Windows runners

View File

@ -1 +1 @@
b829e936f7cc61b48149f5f957a451a38bf2a178
1980f8af5bcd0bb2ce51965cf79d8d4c25dad8a0

View File

@ -8,7 +8,6 @@ ciflow_push_tags:
- ciflow/inductor
- ciflow/inductor-perf-compare
- ciflow/inductor-micro-benchmark
- ciflow/inductor-cu124
- ciflow/linux-aarch64
- ciflow/mps
- ciflow/nightly

View File

@ -19,7 +19,7 @@ CUDA_ARCHES = ["11.8", "12.1", "12.4"]
CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1", "12.4": "12.4.0"}
CUDA_ARCHES_CUDNN_VERSION = {"11.8": "9", "12.1": "9", "12.4": "9"}
CUDA_ARCHES_CUDNN_VERSION = {"11.8": "8", "12.1": "8", "12.4": "8"}
ROCM_ARCHES = ["6.0", "6.1"]
@ -42,7 +42,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
"nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
@ -55,7 +55,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | " # noqa: B950
"nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
@ -68,7 +68,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==8.9.7.29; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | "
@ -347,10 +347,6 @@ def generate_wheels_matrix(
for python_version in python_versions:
for arch_version in arches:
gpu_arch_type = arch_type(arch_version)
# Disable py3.12 builds for ROCm because of triton dependency
# on llnl-hatchet, which doesn't have py3.12 wheels available
if gpu_arch_type == "rocm" and python_version == "3.12":
continue
gpu_arch_version = (
""
if arch_version == "cpu"

View File

@ -773,13 +773,13 @@ class TestBypassFailures(TestCase):
# than the one on the base commit. This should still count as broken trunk
"pr_num": 104214,
"related_failure_count": 0,
"flaky_or_broken_trunk": 1,
"unrelated_failure_count": 1,
},
{
# This PR had one broken trunk failure and it used ghstack
"pr_num": 105145,
"related_failure_count": 0,
"flaky_or_broken_trunk": 1,
"unrelated_failure_count": 1,
},
{
# The failure on the merge base was retried successfully and
@ -788,20 +788,20 @@ class TestBypassFailures(TestCase):
# be used to detect broken trunk
"pr_num": 107160,
"related_failure_count": 0,
"flaky_or_broken_trunk": 1,
"unrelated_failure_count": 4,
},
{
# This PR used Dr.CI broken trunk classification
"pr_num": 111253,
"related_failure_count": 1,
"flaky_or_broken_trunk": 1,
"unrelated_failure_count": 2,
},
]
for case in test_cases:
pr_num = case["pr_num"]
related_failure_count = case["related_failure_count"]
flaky_or_broken_trunk = case["flaky_or_broken_trunk"]
unrelated_failure_count = case["unrelated_failure_count"]
pr = GitHubPR("pytorch", "pytorch", pr_num)
checks = pr.get_checkrun_conclusions()
@ -823,7 +823,7 @@ class TestBypassFailures(TestCase):
)
self.assertTrue(len(pending) == 0)
self.assertTrue(
len(failed) == flaky_or_broken_trunk + related_failure_count
len(failed) == unrelated_failure_count + related_failure_count
)
def test_ignore_current(self, *args: Any) -> None:

View File

@ -2027,8 +2027,10 @@ def categorize_checks(
pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)
# ok_failed_checks is used with ok_failed_checks_threshold while ignorable_failed_checks
# is used to keep track of all ignorable failures when saving the merge record on Rockset
ok_failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
ignorable_failed_checks: Dict[str, List[Any]] = defaultdict(list)
# If required_checks is not set or empty, consider all names are relevant
relevant_checknames = [
@ -2056,38 +2058,36 @@ def categorize_checks(
continue
elif not is_passing_status(check_runs[checkname].status):
target = (
failed_checks_categorization[classification]
ignorable_failed_checks[classification]
if classification
in ("IGNORE_CURRENT_CHECK", "BROKEN_TRUNK", "FLAKY", "UNSTABLE")
else failed_checks
)
target.append((checkname, url, job_id))
flaky_or_broken_trunk = (
failed_checks_categorization["BROKEN_TRUNK"]
+ failed_checks_categorization["FLAKY"]
)
if classification in ("BROKEN_TRUNK", "FLAKY", "UNSTABLE"):
ok_failed_checks.append((checkname, url, job_id))
if flaky_or_broken_trunk:
if ok_failed_checks:
warn(
f"The following {len(flaky_or_broken_trunk)} checks failed but were likely due flakiness or broken trunk: "
+ ", ".join([x[0] for x in flaky_or_broken_trunk])
f"The following {len(ok_failed_checks)} checks failed but were likely due flakiness or broken trunk: "
+ ", ".join([x[0] for x in ok_failed_checks])
+ (
f" but this is greater than the threshold of {ok_failed_checks_threshold} so merge will fail"
if ok_failed_checks_threshold is not None
and len(flaky_or_broken_trunk) > ok_failed_checks_threshold
and len(ok_failed_checks) > ok_failed_checks_threshold
else ""
)
)
if (
ok_failed_checks_threshold is not None
and len(flaky_or_broken_trunk) > ok_failed_checks_threshold
and len(ok_failed_checks) > ok_failed_checks_threshold
):
failed_checks = failed_checks + flaky_or_broken_trunk
failed_checks = failed_checks + ok_failed_checks
# The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
return (pending_checks, failed_checks, failed_checks_categorization)
# The list of ignorable_failed_checks is returned so that it can be saved into the Rockset merge record
return (pending_checks, failed_checks, ignorable_failed_checks)
def merge(

View File

@ -38,19 +38,19 @@ jobs:
matrix:
runner: [linux.12xlarge]
docker-image-name: [
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9,
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9,
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9,
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9,
pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.4-cudnn8-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9,
pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.1-cudnn8-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9,
pytorch-linux-focal-py3.8-clang10,
pytorch-linux-focal-py3.11-clang10,
pytorch-linux-focal-py3.12-clang10,
pytorch-linux-focal-rocm-n-1-py3,
pytorch-linux-focal-rocm-n-py3,
pytorch-linux-jammy-cuda11.8-cudnn9-py3.8-clang12,
pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12,
pytorch-linux-focal-py3-clang9-android-ndk-r21e,
pytorch-linux-jammy-py3.8-gcc11,
pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks,
@ -58,7 +58,7 @@ jobs:
pytorch-linux-jammy-py3-clang15-asan,
pytorch-linux-focal-py3-clang10-onnx,
pytorch-linux-focal-linter,
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter,
pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter,
pytorch-linux-jammy-py3-clang12-executorch
]
include:

View File

@ -149,10 +149,3 @@ jobs:
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()
validate:
needs: build
uses: pytorch/builder/.github/workflows/validate-docker-images.yml@main
with:
channel: nightly
ref: main

View File

@ -54,7 +54,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_8-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cpu-aarch64-test: # Testing
@ -162,7 +162,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_9-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_9-cpu-aarch64-test: # Testing
@ -270,7 +270,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cpu-aarch64-test: # Testing
@ -378,7 +378,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cpu-aarch64-test: # Testing
@ -486,7 +486,7 @@ jobs:
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cpu-aarch64-test: # Testing

View File

@ -48,7 +48,7 @@ jobs:
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda11_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda11_8-test: # Testing
@ -88,7 +88,7 @@ jobs:
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda12_1
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_1-test: # Testing
@ -128,7 +128,7 @@ jobs:
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda12_4
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.7.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_4-test: # Testing

View File

@ -174,7 +174,7 @@ jobs:
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda11_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda11_8-test: # Testing
@ -237,7 +237,7 @@ jobs:
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda12_1
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_1-test: # Testing
@ -300,7 +300,7 @@ jobs:
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda12_4
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.7.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_4-test: # Testing
@ -690,7 +690,7 @@ jobs:
DESIRED_PYTHON: "3.9"
build_name: manywheel-py3_9-cuda11_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_9-cuda11_8-test: # Testing
@ -753,7 +753,7 @@ jobs:
DESIRED_PYTHON: "3.9"
build_name: manywheel-py3_9-cuda12_1
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_9-cuda12_1-test: # Testing
@ -816,7 +816,7 @@ jobs:
DESIRED_PYTHON: "3.9"
build_name: manywheel-py3_9-cuda12_4
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.7.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_9-cuda12_4-test: # Testing
@ -1206,7 +1206,7 @@ jobs:
DESIRED_PYTHON: "3.10"
build_name: manywheel-py3_10-cuda11_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda11_8-test: # Testing
@ -1269,7 +1269,7 @@ jobs:
DESIRED_PYTHON: "3.10"
build_name: manywheel-py3_10-cuda12_1
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda12_1-test: # Testing
@ -1332,7 +1332,7 @@ jobs:
DESIRED_PYTHON: "3.10"
build_name: manywheel-py3_10-cuda12_4
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.7.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cuda12_4-test: # Testing
@ -1722,7 +1722,7 @@ jobs:
DESIRED_PYTHON: "3.11"
build_name: manywheel-py3_11-cuda11_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda11_8-test: # Testing
@ -1785,7 +1785,7 @@ jobs:
DESIRED_PYTHON: "3.11"
build_name: manywheel-py3_11-cuda12_1
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda12_1-test: # Testing
@ -1848,7 +1848,7 @@ jobs:
DESIRED_PYTHON: "3.11"
build_name: manywheel-py3_11-cuda12_4
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.7.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cuda12_4-test: # Testing
@ -2238,7 +2238,7 @@ jobs:
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-cuda11_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda11_8-test: # Testing
@ -2301,7 +2301,7 @@ jobs:
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-cuda12_1
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_1-test: # Testing
@ -2364,7 +2364,7 @@ jobs:
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-cuda12_4
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.7.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cuda12_4-test: # Testing
@ -2410,3 +2410,209 @@ jobs:
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_12-rocm6_0-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.0
GPU_ARCH_VERSION: 6.0
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-main
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-rocm6_0
build_environment: linux-binary-manywheel
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-rocm6_0-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs: manywheel-py3_12-rocm6_0-build
runs-on: linux.rocm.gpu
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.0
GPU_ARCH_VERSION: 6.0
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-main
DESIRED_PYTHON: "3.12"
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v3
name: Download Build Artifacts
with:
name: manywheel-py3_12-rocm6_0
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: malfet/checkout@silent-checkout
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
quiet-checkout: true
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Checkout pytorch/builder
uses: malfet/checkout@silent-checkout
with:
ref: main
submodules: recursive
repository: pytorch/builder
path: builder
quiet-checkout: true
- name: Clean pytorch/builder checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: builder
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: pytorch/manylinux-builder:rocm6.0-main
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
manywheel-py3_12-rocm6_0-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_12-rocm6_0-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.0
GPU_ARCH_VERSION: 6.0
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-main
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-rocm6_0
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_12-rocm6_1-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.1
GPU_ARCH_VERSION: 6.1
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-main
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-rocm6_1
build_environment: linux-binary-manywheel
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-rocm6_1-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs: manywheel-py3_12-rocm6_1-build
runs-on: linux.rocm.gpu
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.1
GPU_ARCH_VERSION: 6.1
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-main
DESIRED_PYTHON: "3.12"
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v3
name: Download Build Artifacts
with:
name: manywheel-py3_12-rocm6_1
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: malfet/checkout@silent-checkout
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
quiet-checkout: true
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Checkout pytorch/builder
uses: malfet/checkout@silent-checkout
with:
ref: main
submodules: recursive
repository: pytorch/builder
path: builder
quiet-checkout: true
- name: Clean pytorch/builder checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: builder
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: pytorch/manylinux-builder:rocm6.1-main
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
manywheel-py3_12-rocm6_1-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_12-rocm6_1-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.1
GPU_ARCH_VERSION: 6.1
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-main
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-rocm6_1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml

View File

@ -54,7 +54,7 @@ jobs:
ALPINE_IMAGE: "docker.io/s390x/alpine"
build_name: manywheel-py3_8-cpu-s390x
build_environment: linux-s390x-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cpu-s390x-test: # Testing
@ -117,7 +117,7 @@ jobs:
ALPINE_IMAGE: "docker.io/s390x/alpine"
build_name: manywheel-py3_9-cpu-s390x
build_environment: linux-s390x-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_9-cpu-s390x-test: # Testing
@ -180,7 +180,7 @@ jobs:
ALPINE_IMAGE: "docker.io/s390x/alpine"
build_name: manywheel-py3_10-cpu-s390x
build_environment: linux-s390x-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_10-cpu-s390x-test: # Testing
@ -243,7 +243,7 @@ jobs:
ALPINE_IMAGE: "docker.io/s390x/alpine"
build_name: manywheel-py3_11-cpu-s390x
build_environment: linux-s390x-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_11-cpu-s390x-test: # Testing
@ -306,7 +306,7 @@ jobs:
ALPINE_IMAGE: "docker.io/s390x/alpine"
build_name: manywheel-py3_12-cpu-s390x
build_environment: linux-s390x-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_12-cpu-s390x-test: # Testing

View File

@ -46,7 +46,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.8"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
# For sccache access (only on non-forked PRs)
AWS_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}
@ -165,7 +165,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.9"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
# For sccache access (only on non-forked PRs)
AWS_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}
@ -284,7 +284,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.10"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
# For sccache access (only on non-forked PRs)
AWS_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}
@ -403,7 +403,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.11"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
# For sccache access (only on non-forked PRs)
AWS_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}
@ -522,7 +522,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.12"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
# For sccache access (only on non-forked PRs)
AWS_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}

View File

@ -46,7 +46,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.8"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -290,7 +290,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.8"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -536,7 +536,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.8"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -782,7 +782,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.8"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -1027,7 +1027,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.9"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -1271,7 +1271,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.9"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -1517,7 +1517,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.9"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -1763,7 +1763,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.9"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -2008,7 +2008,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.10"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -2252,7 +2252,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.10"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -2498,7 +2498,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.10"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -2744,7 +2744,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.10"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -2989,7 +2989,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.11"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -3233,7 +3233,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.11"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -3479,7 +3479,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.11"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -3725,7 +3725,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.11"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -3970,7 +3970,7 @@ jobs:
GPU_ARCH_TYPE: cpu
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.12"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -4214,7 +4214,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.12"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -4460,7 +4460,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.12"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash
@ -4706,7 +4706,7 @@ jobs:
GPU_ARCH_TYPE: cuda
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.12"
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
steps:
- name: Display EC2 information
shell: bash

View File

@ -1,108 +0,0 @@
name: inductor-cu124
on:
push:
tags:
- ciflow/inductor-cu124/*
workflow_dispatch:
schedule:
# Run every 4 hours during the week and every 12 hours on the weekend
- cron: 45 0,4,8,12,16,20 * * 1-5
- cron: 45 4,12 * * 0,6
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
permissions: read-all
jobs:
linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
# Should be synced with the one in inductor.yml, but this doesn't run inductor_timm
name: cuda12.4-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-build.yml
with:
sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.6'
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" },
{ config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_cpp_wrapper_abi_compatible", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-focal-cuda12_4-py3_10-gcc9-inductor-test:
name: cuda12.4-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
with:
sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-test
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp:
name: cuda12.4-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
test-matrix: |
{ include: [
{ config: "inductor_torchbench_smoketest_perf", shard: 1, num_shards: 1, runner: "linux.gcp.a100" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-focal-cuda12_4-py3_10-gcc9-inductor-test-gcp:
name: cuda12.4-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-focal-cuda12_4-py3_12-gcc9-inductor-build:
name: cuda12.4-py3.12-gcc9-sm86
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.4-py3.12-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks
cuda-arch-list: '8.6'
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_4-py3_12-gcc9-inductor-test:
name: cuda12.4-py3.12-gcc9-sm86
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-cuda12_4-py3_12-gcc9-inductor-build
with:
build-environment: linux-focal-cuda12.4-py3.12-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_4-py3_12-gcc9-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_12-gcc9-inductor-build.outputs.test-matrix }}

View File

@ -21,7 +21,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
test-matrix: |
{ include: [

View File

@ -18,7 +18,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
test-matrix: |
{ include: [

View File

@ -71,7 +71,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
test-matrix: |
{ include: [

View File

@ -23,7 +23,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.6'
test-matrix: |
{ include: [

View File

@ -44,7 +44,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.6'
test-matrix: |
{ include: [
@ -86,7 +86,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
test-matrix: |
{ include: [
@ -112,7 +112,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.12-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3.12-gcc9-inductor-benchmarks
cuda-arch-list: '8.6'
test-matrix: |
{ include: [
@ -129,18 +129,32 @@ jobs:
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_12-gcc9-inductor-build.outputs.test-matrix }}
linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
# Should be synced with the one in inductor-periodic.yml but this only runs inductor_timm
name: cuda12.4-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-build.yml
with:
sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.6'
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" },
{ config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_cpp_wrapper_abi_compatible", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
@ -150,13 +164,59 @@ jobs:
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
with:
sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-test
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp:
name: cuda12.4-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
test-matrix: |
{ include: [
{ config: "inductor_torchbench_smoketest_perf", shard: 1, num_shards: 1, runner: "linux.gcp.a100" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-focal-cuda12_4-py3_12-gcc9-inductor-build:
name: cuda12.4-py3.12-gcc9-sm86
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.4-py3.12-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3.12-gcc9-inductor-benchmarks
cuda-arch-list: '8.6'
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_4-py3_10-gcc9-inductor-test-gcp:
name: cuda12.4-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build-gcp.outputs.test-matrix }}
use-gha: anything-non-empty-to-use-gha
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-focal-cuda12_4-py3_12-gcc9-inductor-test:
name: cuda12.4-py3.12-gcc9-sm86
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-cuda12_4-py3_12-gcc9-inductor-build
with:
build-environment: linux-focal-cuda12.4-py3.12-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_4-py3_12-gcc9-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_12-gcc9-inductor-build.outputs.test-matrix }}
linux-jammy-cpu-py3_8-gcc11-inductor-build:
name: linux-jammy-cpu-py3.8-gcc11-inductor
uses: ./.github/workflows/_linux-build.yml

View File

@ -20,7 +20,7 @@ jobs:
with:
timeout: 120
runner: linux.2xlarge
docker-image: pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter
docker-image: pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
fetch-depth: 0
@ -36,7 +36,7 @@ jobs:
with:
timeout: 120
runner: linux.2xlarge
docker-image: pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter
docker-image: pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
fetch-depth: 0

View File

@ -37,59 +37,6 @@ jobs:
permissions:
id-token: write
contents: read
linux-focal-cuda12_1-py3_10-gcc9-build:
name: linux-focal-cuda12.1-py3.10-gcc9
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_1-py3_10-gcc9-test:
name: linux-focal-cuda12.1-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_1-py3_10-gcc9-build
- target-determination
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.test-matrix }}
linux-focal-cuda12_4-py3_10-gcc9-build:
name: linux-focal-cuda12.4-py3.10-gcc9
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "deploy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_4-py3_10-gcc9-test:
name: linux-focal-cuda12.4-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-build
- target-determination
with:
timeout-minutes: 360
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
parallelnative-linux-jammy-py3_8-gcc11-build:
name: parallelnative-linux-jammy-py3.8-gcc11
@ -120,7 +67,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda11.8-py3.9-gcc9
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [
@ -142,7 +89,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda11.8-py3.10-gcc9-debug
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9
build-with-debug: true
test-matrix: |
{ include: [

View File

@ -237,7 +237,7 @@ jobs:
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-cuda11.8-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.8xlarge.nvidia.gpu" },
@ -262,7 +262,7 @@ jobs:
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
@ -285,6 +285,34 @@ jobs:
docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.test-matrix }}
linux-focal-cuda12_4-py3_10-gcc9-build:
name: linux-focal-cuda12.4-py3.10-gcc9
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "linux.4xlarge.nvidia.gpu" },
{ config: "deploy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_4-py3_10-gcc9-test:
name: linux-focal-cuda12.4-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-build
- target-determination
with:
timeout-minutes: 360
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
linux-jammy-py3-clang12-mobile-build:
name: linux-jammy-py3-clang12-mobile-build
uses: ./.github/workflows/_linux-build-label.yml
@ -297,12 +325,12 @@ jobs:
{ config: "default", shard: 1, num_shards: 1 },
]}
linux-jammy-cuda-11_8-cudnn9-py3_8-clang12-build:
name: linux-jammy-cuda11.8-cudnn9-py3.8-clang12
linux-jammy-cuda-11_8-cudnn8-py3_8-clang12-build:
name: linux-jammy-cuda11.8-cudnn8-py3.8-clang12
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-jammy-cuda11.8-cudnn9-py3.8-clang12
docker-image-name: pytorch-linux-jammy-cuda11.8-cudnn9-py3.8-clang12
build-environment: linux-jammy-cuda11.8-cudnn8-py3.8-clang12
docker-image-name: pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
@ -361,7 +389,7 @@ jobs:
uses: ./.github/workflows/_bazel-build-test.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
cuda-version: cpu
test-matrix: |
{ include: [
@ -373,7 +401,7 @@ jobs:
uses: ./.github/workflows/_bazel-build-test.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
cuda-version: "12.1"
test-matrix: |
{ include: [
@ -385,7 +413,7 @@ jobs:
uses: ./.github/workflows/_bazel-build-test.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-bazel-test
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9
cuda-version: "12.4"
test-matrix: |
{ include: [
@ -447,7 +475,7 @@ jobs:
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [
@ -469,6 +497,33 @@ jobs:
docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-sm86-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-sm86-build.outputs.test-matrix }}
linux-focal-cuda12_4-py3_10-gcc9-sm86-build:
name: linux-focal-cuda12.4-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-build-label.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_4-py3_10-gcc9-sm86-test:
name: linux-focal-cuda12.4-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-sm86-build
- target-determination
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm86-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm86-build.outputs.test-matrix }}
linux-jammy-py3-clang12-executorch-build:
name: linux-jammy-py3-clang12-executorch
uses: ./.github/workflows/_linux-build-label.yml

View File

@ -41,7 +41,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [
@ -70,7 +70,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [

View File

@ -26,7 +26,7 @@ jobs:
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
working-directory: pytorch
- name: Use following to pull public copy of the image

View File

@ -16,7 +16,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
test-matrix: |
{ include: [

View File

@ -34,39 +34,36 @@ jobs:
id-token: write
contents: read
linux-focal-cuda12_4-py3_10-gcc9-sm86-build:
name: linux-focal-cuda12.4-py3.10-gcc9-sm86
uses: ./.github/workflows/_linux-build-label.yml
linux-focal-cuda12_1-py3_10-gcc9-build:
name: linux-focal-cuda12.1-py3.10-gcc9
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
cuda-arch-list: 8.6
build-environment: linux-focal-cuda12.1-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_4-py3_10-gcc9-sm86-test:
name: linux-focal-cuda12.4-py3.10-gcc9-sm86
linux-focal-cuda12_1-py3_10-gcc9-test:
name: linux-focal-cuda12.1-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-sm86-build
- linux-focal-cuda12_1-py3_10-gcc9-build
- target-determination
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm86-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-sm86-build.outputs.test-matrix }}
build-environment: linux-focal-cuda12.1-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-build.outputs.test-matrix }}
libtorch-linux-focal-cuda12_1-py3_7-gcc9-debug-build:
name: libtorch-linux-focal-cuda12.1-py3.7-gcc9-debug
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: libtorch-linux-focal-cuda12.1-py3.7-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
build-generates-artifacts: false
runner: linux.4xlarge
test-matrix: |
@ -80,18 +77,42 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-no-ops
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
]}
linux-focal-cuda12_4-py3_10-gcc9-build:
name: linux-focal-cuda12.4-py3.10-gcc9
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_4-py3_10-gcc9-test:
name: linux-focal-cuda12.4-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-focal-cuda12_4-py3_10-gcc9-build
- target-determination
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-build.outputs.test-matrix }}
libtorch-linux-focal-cuda12_4-py3_7-gcc9-debug-build:
name: libtorch-linux-focal-cuda12.4-py3.7-gcc9-debug
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: libtorch-linux-focal-cuda12.4-py3.7-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9
build-generates-artifacts: false
runner: linux.4xlarge
test-matrix: |
@ -105,7 +126,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-no-ops
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },

View File

@ -1532,6 +1532,28 @@ exclude_patterns = [
'torch/distributed/optim/post_localSGD_optimizer.py',
'torch/distributed/optim/utils.py',
'torch/distributed/optim/zero_redundancy_optimizer.py',
'torch/distributed/pipeline/__init__.py',
'torch/distributed/pipeline/sync/__init__.py',
'torch/distributed/pipeline/sync/_balance/__init__.py',
'torch/distributed/pipeline/sync/_balance/blockpartition.py',
'torch/distributed/pipeline/sync/_balance/profile.py',
'torch/distributed/pipeline/sync/batchnorm.py',
'torch/distributed/pipeline/sync/checkpoint.py',
'torch/distributed/pipeline/sync/copy.py',
'torch/distributed/pipeline/sync/dependency.py',
'torch/distributed/pipeline/sync/microbatch.py',
'torch/distributed/pipeline/sync/phony.py',
'torch/distributed/pipeline/sync/pipe.py',
'torch/distributed/pipeline/sync/pipeline.py',
'torch/distributed/pipeline/sync/skip/__init__.py',
'torch/distributed/pipeline/sync/skip/layout.py',
'torch/distributed/pipeline/sync/skip/namespace.py',
'torch/distributed/pipeline/sync/skip/portal.py',
'torch/distributed/pipeline/sync/skip/skippable.py',
'torch/distributed/pipeline/sync/skip/tracker.py',
'torch/distributed/pipeline/sync/stream.py',
'torch/distributed/pipeline/sync/utils.py',
'torch/distributed/pipeline/sync/worker.py',
'torch/distributed/remote_device.py',
'torch/distributed/rendezvous.py',
'torch/distributed/rpc/__init__.py',
@ -1825,6 +1847,8 @@ exclude_patterns = [
'torch/testing/_internal/distributed/nn/__init__.py',
'torch/testing/_internal/distributed/nn/api/__init__.py',
'torch/testing/_internal/distributed/nn/api/remote_module_test.py',
'torch/testing/_internal/distributed/pipe_with_ddp_test.py',
'torch/testing/_internal/distributed/pipeline/__init__.py',
'torch/testing/_internal/distributed/rpc/__init__.py',
'torch/testing/_internal/distributed/rpc/dist_autograd_test.py',
'torch/testing/_internal/distributed/rpc/dist_optimizer_test.py',
@ -2079,7 +2103,7 @@ init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'ruff==0.4.8',
'ruff==0.4.6',
]
is_formatter = true

View File

@ -461,8 +461,15 @@ filegroup(
filegroup(
name = "caffe2_perfkernels_srcs",
srcs = [
"caffe2/perfkernels/adagrad.cc",
"caffe2/perfkernels/embedding_lookup.cc",
"caffe2/perfkernels/embedding_lookup_idx.cc",
"caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup.cc",
"caffe2/perfkernels/fused_8bit_rowwise_embedding_lookup_idx.cc",
"caffe2/perfkernels/fused_nbit_rowwise_conversion.cc",
"caffe2/perfkernels/lstm_unit_cpu_common.cc",
"caffe2/perfkernels/math_cpu_base.cc",
"caffe2/perfkernels/typed_axpy.cc",
],
)

View File

@ -242,7 +242,8 @@ option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON)
option(USE_ASAN "Use Address+Undefined Sanitizers" OFF)
option(USE_TSAN "Use Thread Sanitizer" OFF)
option(USE_CUDA "Use CUDA" ON)
option(USE_XPU "Use XPU" ON)
cmake_dependent_option(USE_XPU "Use XPU. Only available on Linux." ON "LINUX"
OFF)
cmake_dependent_option(
BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
"USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
@ -865,13 +866,12 @@ cmake_dependent_option(
# Suspect users building from source will need this
add_definitions(-DFLASHATTENTION_DISABLE_ALIBI)
# CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem
# Eff Attention won't
# CAVEAT: Again, do not check USE_ROCM here Flash Attention2 will error while
# building for sm52 while Mem Eff Attention won't
cmake_dependent_option(
USE_MEM_EFF_ATTENTION
"Enable memory-efficient attention for scaled dot product attention.\
Will be disabled if not supported by the platform" ON
"USE_CUDA OR USE_ROCM" OFF)
Will be disabled if not supported by the platform" ON "USE_CUDA" OFF)
if(DEBUG_CUDA)
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -lineinfo")

View File

@ -189,7 +189,7 @@ Other potentially useful environment variables may be found in `setup.py`.
##### Intel GPU Support
If you want to compile with Intel GPU support, follow these
- [PyTorch Prerequisites for Intel GPUs](https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html) instructions.
- Intel GPU is supported for Linux and Windows.
- Intel GPU is currently supported only for Linux systems.
If you want to disable Intel GPU support, export the environment variable `USE_XPU=0`.
Other potentially useful environment variables may be found in `setup.py`.
@ -213,7 +213,6 @@ conda install -c pytorch magma-cuda121 # or the magma-cuda* that matches your C
# (optional) If using torch.compile with inductor/triton, install the matching version of triton
# Run from the pytorch directory after cloning
# For Intel GPU support, please explicitly `export USE_XPU=1` before running command.
make triton
```

View File

@ -40,7 +40,7 @@ Important Note: The trustworthiness of a model is not binary. You must always de
### Untrusted inputs during training and prediction
If you plan to open your model to untrusted inputs, be aware that inputs can also be used as vectors by malicious agents. To minimize risks, make sure to give your model only the permissions strictly required, and keep your libraries updated with the latest security patches.
If you plan to open your model to untrusted inputs, be aware that inputs can also be used as vectors by malicious agents. To minimize risks, make sure to give your model only the permisisons strictly required, and keep your libraries updated with the lates security patches.
If applicable, prepare your model against bad inputs and prompt injections. Some recommendations:
- Pre-analysis: check how the model performs by default when exposed to prompt injection (e.g. using fuzzing for prompt injection).

View File

@ -473,6 +473,7 @@ endif()
if(USE_CUDA AND NOT USE_ROCM)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include)
list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include)
if($ENV{ATEN_STATIC_CUDA})
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
${CUDA_LIBRARIES}

View File

@ -364,7 +364,7 @@ class TORCH_API Context {
bool enabled_flashSDP = true;
bool enabled_mem_efficientSDP = true;
bool enabled_mathSDP = true;
bool enabled_cudnnSDP = true;
bool enabled_cudnnSDP = false;
#ifdef USE_ROCM
bool benchmark_cudnn = true;
#else
@ -385,11 +385,8 @@ class TORCH_API Context {
? at::LinalgBackend::Cusolver
: at::LinalgBackend::Default;
at::BlasBackend blas_preferred_backend =
#ifdef USE_ROCM
(c10::utils::check_env("TORCH_BLAS_PREFER_HIPBLASLT") != false)
#else
(c10::utils::check_env("TORCH_BLAS_PREFER_CUBLASLT") == true)
#endif
(c10::utils::check_env("TORCH_BLAS_PREFER_CUBLASLT") == true ||
c10::utils::check_env("TORCH_BLAS_PREFER_HIPBLASLT") == true)
? at::BlasBackend::Cublaslt
: at::BlasBackend::Cublas;
#ifdef C10_MOBILE

View File

@ -143,7 +143,7 @@ static Device getATenDevice(const DLDevice& ctx, void* data) {
return at::detail::getXPUHooks().getDeviceFromPtr(data);
default:
TORCH_CHECK(
false, "Unsupported device_type: ", std::to_string(ctx.device_type));
false, "Unsupported device_type: " + c10::to_string(ctx.device_type));
}
}
@ -167,7 +167,7 @@ ScalarType toScalarType(const DLDataType& dtype) {
break;
default:
TORCH_CHECK(
false, "Unsupported kUInt bits ", std::to_string(dtype.bits));
false, "Unsupported kUInt bits " + c10::to_string(dtype.bits));
}
break;
case DLDataTypeCode::kDLInt:
@ -186,7 +186,7 @@ ScalarType toScalarType(const DLDataType& dtype) {
break;
default:
TORCH_CHECK(
false, "Unsupported kInt bits ", std::to_string(dtype.bits));
false, "Unsupported kInt bits " + c10::to_string(dtype.bits));
}
break;
case DLDataTypeCode::kDLFloat:
@ -202,7 +202,7 @@ ScalarType toScalarType(const DLDataType& dtype) {
break;
default:
TORCH_CHECK(
false, "Unsupported kFloat bits ", std::to_string(dtype.bits));
false, "Unsupported kFloat bits " + c10::to_string(dtype.bits));
}
break;
case DLDataTypeCode::kDLBfloat:
@ -212,7 +212,7 @@ ScalarType toScalarType(const DLDataType& dtype) {
break;
default:
TORCH_CHECK(
false, "Unsupported kFloat bits ", std::to_string(dtype.bits));
false, "Unsupported kFloat bits " + c10::to_string(dtype.bits));
}
break;
case DLDataTypeCode::kDLComplex:
@ -228,7 +228,7 @@ ScalarType toScalarType(const DLDataType& dtype) {
break;
default:
TORCH_CHECK(
false, "Unsupported kFloat bits ", std::to_string(dtype.bits));
false, "Unsupported kFloat bits " + c10::to_string(dtype.bits));
}
break;
case DLDataTypeCode::kDLBool:
@ -238,11 +238,11 @@ ScalarType toScalarType(const DLDataType& dtype) {
break;
default:
TORCH_CHECK(
false, "Unsupported kDLBool bits ", std::to_string(dtype.bits));
false, "Unsupported kDLBool bits " + c10::to_string(dtype.bits));
}
break;
default:
TORCH_CHECK(false, "Unsupported code ", std::to_string(dtype.code));
TORCH_CHECK(false, "Unsupported code " + c10::to_string(dtype.code));
}
return stype;
}
@ -298,7 +298,9 @@ Tensor fromDLPack(DLManagedTensor* src) {
return fromDLPack(src, std::move(deleter));
}
Tensor fromDLPack(DLManagedTensor* src, std::function<void(void*)> deleter) {
Tensor fromDLPack(
DLManagedTensor* src,
std::function<void(void*)> deleter) {
Device device = getATenDevice(src->dl_tensor.device, src->dl_tensor.data);
ScalarType stype = toScalarType(src->dl_tensor.dtype);
if (!src->dl_tensor.strides) {

View File

@ -19,13 +19,7 @@ MemOverlap has_internal_overlap(TensorImpl* t) {
auto strides = t->sym_strides();
auto sizes = t->sym_sizes();
for (const auto i : c10::irange(strides.size())) {
// NB: The size oblivious test is written very carefully here. When
// unbacked SymInts are involved, we should try to conservatively report
// if memory overlap /could/ happen under some setting of unbacked
// SymInts. Thus, if I have u0 size, we should assume that this has > 1
// elements (first expression), but if I have a u0 stride, I should NOT
// assume that it is not zero (second expression)
if (TORCH_GUARD_SIZE_OBLIVIOUS(sizes[i].sym_gt(1)) && strides[i] == 0) {
if (strides[i] == 0 && sizes[i] > 1) {
return MemOverlap::Yes;
}
}

View File

@ -197,7 +197,7 @@ TORCH_API std::ostream& operator<<(
const std::vector<TensorIndex>& tensor_indices);
namespace impl {
inline Tensor applySlice(
static inline Tensor applySlice(
const Tensor& self,
int64_t dim,
c10::SymInt start,
@ -218,8 +218,8 @@ inline Tensor applySlice(
? (*self_sizes)[dim]
: self.sym_size(dim);
if (!disable_slice_optimization &&
TORCH_GUARD_SIZE_OBLIVIOUS(start.sym_eq(0)) &&
TORCH_GUARD_SIZE_OBLIVIOUS(length.sym_eq(stop)) && step == 1) {
TORCH_GUARD_SIZE_OBLIVIOUS(start.sym_eq(0)) && length == stop &&
step == 1) {
return self;
}
}
@ -227,7 +227,7 @@ inline Tensor applySlice(
dim, std::move(start), std::move(stop), std::move(step));
}
inline Tensor applySelect(
static inline Tensor applySelect(
const Tensor& self,
int64_t dim,
SymInt index,
@ -266,7 +266,9 @@ inline Tensor applySelect(
return self.select_symint(dim, std::move(index));
}
inline Tensor boolToIndexingTensorCPUOrCUDA(const Tensor& self, bool value) {
static inline Tensor boolToIndexingTensorCPUOrCUDA(
const Tensor& self,
bool value) {
// booleans add a dimension of size 1. true indexes this dimension as if 0:,
// false as empty.
if (value) {
@ -276,7 +278,7 @@ inline Tensor boolToIndexingTensorCPUOrCUDA(const Tensor& self, bool value) {
}
}
inline Tensor boolToIndexingTensorNonNativeDeviceType(
static inline Tensor boolToIndexingTensorNonNativeDeviceType(
const Tensor& self,
bool value) {
// booleans add a dimension of size 1. true indexes this dimension as if 0:,
@ -288,7 +290,7 @@ inline Tensor boolToIndexingTensorNonNativeDeviceType(
}
}
inline Tensor boolToIndexingTensor(
static inline Tensor boolToIndexingTensor(
const Tensor& self,
bool value,
const at::Device& self_device) {
@ -299,13 +301,13 @@ inline Tensor boolToIndexingTensor(
}
}
inline Tensor scalarToTensorNonNativeDeviceType(
static inline Tensor scalarToTensorNonNativeDeviceType(
const Scalar& v,
const TensorOptions& options) {
return at::scalar_tensor(v, options);
}
inline void recordTensorIndex(
static inline void recordTensorIndex(
const Tensor& tensor,
std::vector<Tensor>& outIndices,
int64_t* dim_ptr) {
@ -315,7 +317,7 @@ inline void recordTensorIndex(
(*dim_ptr)++;
};
inline c10::List<::std::optional<Tensor>> typeConvertIndices(
static inline c10::List<::std::optional<Tensor>> typeConvertIndices(
const Tensor& /*self*/,
std::vector<Tensor>&& indices) {
c10::List<::std::optional<Tensor>> converted_inds;
@ -336,7 +338,7 @@ inline c10::List<::std::optional<Tensor>> typeConvertIndices(
// construct a `std::vector` container to be consumed by the C++
// `count_specified_dimensions` function, which adds 100s of nanoseconds
// overhead and is undesirable.
inline int64_t count_specified_dimensions(
static inline int64_t count_specified_dimensions(
const ArrayRef<TensorIndex>& indices) {
// Count the number of indexed dimensions (everything but ellipsis and None)
int64_t count = 0;
@ -370,7 +372,7 @@ inline int64_t count_specified_dimensions(
//
// The rest of the functions are in `at::indexing::impl` namespace, signifying
// that they shouldn't be used from Python indexing implementation.
inline Tensor scalarToTensor(
static inline Tensor scalarToTensor(
const Scalar& v,
const TensorOptions& options,
const at::Device& self_device) {
@ -385,7 +387,7 @@ inline Tensor scalarToTensor(
// To match numpy semantics:
// As a special case for backwards compatibility,
// strip away unit dimensions from the left of 'src'
inline SymIntArrayRef slicePrefix1sSize(const SymIntArrayRef& sizes) {
static inline SymIntArrayRef slicePrefix1sSize(const SymIntArrayRef& sizes) {
size_t first_non1_src = sizes.size();
for (const auto i : c10::irange(sizes.size())) {
// Unbacked SymInt has different behavior, but this is sound because
@ -400,7 +402,7 @@ inline SymIntArrayRef slicePrefix1sSize(const SymIntArrayRef& sizes) {
return sizes.slice(first_non1_src);
}
inline void copy_to(const Tensor& dst, const Tensor& src) {
static inline void copy_to(const Tensor& dst, const Tensor& src) {
if (dst.sym_sizes().equals(src.sym_sizes())) {
// A shortcut to avoid generating hard-coded constant sizes during tracing.
// This is not a perfect solution: when src & dst have different shapes,
@ -419,7 +421,7 @@ inline void copy_to(const Tensor& dst, const Tensor& src) {
// See NOTE [ Setting `disable_slice_optimization` when calling C++ tensor
// indexing functions from Python ]
inline Tensor handleDimInMultiDimIndexing(
static inline Tensor handleDimInMultiDimIndexing(
const Tensor& prev_dim_result,
const Tensor& original_tensor,
const TensorIndex& index,
@ -507,7 +509,7 @@ inline Tensor handleDimInMultiDimIndexing(
namespace impl {
// This mirrors `applySlicing` in
// torch/csrc/autograd/python_variable_indexing.cpp
inline Tensor applySlicing(
static inline Tensor applySlicing(
const Tensor& self,
const ArrayRef<TensorIndex>& indices,
std::vector<Tensor>& outIndices,
@ -548,13 +550,13 @@ inline Tensor applySlicing(
}
} // namespace impl
inline Tensor dispatch_index(
static inline Tensor dispatch_index(
const Tensor& self,
std::vector<Tensor>&& indices) {
return self.index(impl::typeConvertIndices(self, std::move(indices)));
}
inline Tensor dispatch_index_put_(
static inline Tensor dispatch_index_put_(
Tensor& self,
std::vector<Tensor>&& indices,
const Tensor& value) {
@ -596,7 +598,7 @@ inline Tensor dispatch_index_put_(
// torch/csrc/autograd/python_variable_indexing.cpp See NOTE [ Setting
// `disable_slice_optimization` when calling C++ tensor indexing functions from
// Python ]
inline Tensor get_item(
static inline Tensor get_item(
const Tensor& self,
const ArrayRef<TensorIndex>& indices,
bool disable_slice_optimization = false) {
@ -662,7 +664,7 @@ inline Tensor get_item(
// torch/csrc/autograd/python_variable_indexing.cpp for "the assigned value is a
// Tensor" case See NOTE [ Setting `disable_slice_optimization` when calling C++
// tensor indexing functions from Python ]
inline void set_item(
static inline void set_item(
const Tensor& self,
const ArrayRef<TensorIndex>& indices,
const Tensor& value,

View File

@ -22,6 +22,7 @@
#endif
#include <c10/util/irange.h>
#include <c10/util/string_utils.h>
#include <c10/util/SmallBuffer.h>
#include <array>
@ -1397,7 +1398,7 @@ bool TensorIteratorBase::fast_set_up(const TensorIteratorConfig& config) {
break;
}
default:
TORCH_INTERNAL_ASSERT(false, "Unsupported fast setup type", std::to_string((int)setup_type));
TORCH_INTERNAL_ASSERT(false, "Unsupported fast setup type", c10::to_string((int)setup_type));
}
//coalescing dimensions consists of collapsing dimensions to 1 (we are limited to contiguous no-broadcast cases here)
if (ndim() > 1){

View File

@ -68,7 +68,7 @@ thread_local std::array<at::ScalarType, at::COMPILE_TIME_MAX_DEVICE_TYPES>
at::kBFloat16, // XLA / TPU
at::ScalarType::Undefined, // Vulkan
at::ScalarType::Undefined, // Metal
at::kHalf, // XPU
at::kBFloat16, // XPU
at::ScalarType::Undefined, // MPS
at::ScalarType::Undefined, // Meta (tensors with no data)
at::kBFloat16, // HPU / HABANA

View File

@ -31,7 +31,7 @@ struct TemplateEnv {
// Add a number 'v' to the map at key 'k'
template <typename T>
void d(const std::string& k, const T& v) {
strings_[k] = std::to_string(v);
strings_[k] = c10::to_string(v);
lists_.erase(k);
}

View File

@ -150,7 +150,7 @@ Generator make_generator(Args&&... args) {
* the backend generator type (CPU/CUDAGeneratorImpl etc.)
*/
template <typename T>
inline T * check_generator(std::optional<Generator> gen) {
static inline T * check_generator(std::optional<Generator> gen) {
TORCH_CHECK(gen.has_value(), "Expected Generator but received nullopt");
TORCH_CHECK(gen->defined(), "Generator with undefined implementation is not allowed");
TORCH_CHECK(T::device_type() == gen->device().type(), "Expected a '", T::device_type(), "' device type for generator but found '", gen->device().type(), "'");
@ -164,7 +164,7 @@ inline T * check_generator(std::optional<Generator> gen) {
* the backend generator type (CPU/CUDAGeneratorImpl etc.)
*/
template <typename T>
inline T* get_generator_or_default(const std::optional<Generator>& gen, const Generator& default_gen) {
static inline T* get_generator_or_default(const std::optional<Generator>& gen, const Generator& default_gen) {
return gen.has_value() && gen->defined() ? check_generator<T>(gen) : check_generator<T>(default_gen);
}
@ -177,7 +177,7 @@ namespace detail {
* - The new state tensor must be a torch.ByteTensor
* - Data of the new state tensor must be contiguous
*/
inline void check_rng_state(const c10::TensorImpl& new_state) {
static inline void check_rng_state(const c10::TensorImpl& new_state) {
TORCH_CHECK_TYPE(
new_state.layout() == kStrided && new_state.device().type() == kCPU && new_state.dtype() == kByte,
"RNG state must be a torch.ByteTensor"

View File

@ -953,7 +953,7 @@ TensorBase make_tensor_base(Args&&... args) {
} // namespace detail
inline DispatchKey legacyExtractDispatchKey(const TensorBase& t) {
static inline DispatchKey legacyExtractDispatchKey(const TensorBase& t) {
return legacyExtractDispatchKey(t.key_set());
}

View File

@ -275,6 +275,16 @@ void expectOutOfPlaceMultiBoxedCallingWorks(const KernelFunction& func) {
EXPECT_TRUE(stack[1].toTensor().is_same(t2));
}
void expectBoxedCallingFailsWith(const KernelFunction& func, const char* errorMessage) {
called_with_args = c10::nullopt;
vector<IValue> stack {3, 4};
OperatorHandle dummy = makeDummyOperatorHandle();
expectThrows<c10::Error>([&] {
func.callBoxed(dummy, CPU_TEST_SET, &stack);
}, errorMessage);
}
//
// unboxed calling tests:
//

View File

@ -51,6 +51,17 @@ void expectCallsIncrement(DispatchKey dispatch_key) {
EXPECT_EQ(6, result[0].toInt());
}
void expectCallsDecrement(DispatchKey dispatch_key) {
at::AutoDispatchBelowAutograd mode;
// assert that schema and cpu kernel are present
auto op = c10::Dispatcher::singleton().findSchema({"_test::my_op", ""});
ASSERT_TRUE(op.has_value());
auto result = callOp(*op, dummyTensor(dispatch_key), 5);
EXPECT_EQ(1, result.size());
EXPECT_EQ(4, result[0].toInt());
}
TEST(OperatorRegistrationTestLegacyFunctionBasedKernel, givenKernel_whenRegistered_thenCanBeCalled) {
auto registrar = RegisterOperators().op("_test::my_op(Tensor dummy, int input) -> int", &incrementKernel);
expectCallsIncrement(DispatchKey::CPU);

View File

@ -662,6 +662,18 @@ void expectCallsConcatUnboxed(DispatchKey dispatch_key) {
EXPECT_EQ("123", result);
}
void expectCannotCallConcatBoxed(DispatchKey dispatch_key) {
at::AutoDispatchBelowAutograd mode;
// assert that schema and cpu kernel are present
auto op = c10::Dispatcher::singleton().findSchema({"_test::my_op", ""});
ASSERT_TRUE(op.has_value());
expectThrows<c10::Error>(
[&] {callOp(*op, dummyTensor(dispatch_key), "1", "2", 3);},
"Tried to call KernelFunction::callBoxed() on a KernelFunction that can only be called with KernelFunction::call()."
);
}
TEST(OperatorRegistrationTestFunctionBasedKernel, givenKernel_whenRegistered_thenCanBeCalledUnboxed) {
auto registrar = RegisterOperators().op("_test::my_op(Tensor dummy, str a, str b, int c) -> str", RegisterOperators::options().kernel<decltype(concatKernel), &concatKernel>(DispatchKey::CPU));
expectCallsConcatUnboxed(DispatchKey::CPU);

View File

@ -51,6 +51,17 @@ void expectCallsIncrement(DispatchKey dispatch_key) {
EXPECT_EQ(6, result[0].toInt());
}
void expectCallsDecrement(DispatchKey dispatch_key) {
at::AutoDispatchBelowAutograd mode;
// assert that schema and cpu kernel are present
auto op = c10::Dispatcher::singleton().findSchema({"_test::my_op", ""});
ASSERT_TRUE(op.has_value());
auto result = callOp(*op, dummyTensor(dispatch_key), 5);
EXPECT_EQ(1, result.size());
EXPECT_EQ(4, result[0].toInt());
}
TEST(OperatorRegistrationTestFunctorBasedKernel, givenKernel_whenRegistered_thenCanBeCalled) {
auto registrar = RegisterOperators().op("_test::my_op(Tensor dummy, int input) -> int", RegisterOperators::options().kernel<IncrementKernel>(DispatchKey::CPU));
expectCallsIncrement(DispatchKey::CPU);

View File

@ -21,7 +21,7 @@ namespace impl {
// on TLS.
//
// NB: If there is no valid dispatch key, this will return Undefined
inline DispatchKeySet computeDispatchKeySet(
static inline DispatchKeySet computeDispatchKeySet(
DispatchKeySet ks,
// The key mask lets us eliminate (by zero entries) keys which should not
// be considered for dispatch. There are two cases when we use this:

View File

@ -66,51 +66,51 @@ class Operation {
// treat the last N elements of the stack as a list, looking up
// element i
inline IValue& peek(Stack& stack, size_t i, size_t N) {
static inline IValue& peek(Stack& stack, size_t i, size_t N) {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions)
return *(stack.end() - N + i);
}
inline IValue& peek(Stack* stack, size_t i, size_t N) {
static inline IValue& peek(Stack* stack, size_t i, size_t N) {
return peek(*stack, i, N);
}
inline const IValue& peek(const Stack& stack, size_t i, size_t N) {
static inline const IValue& peek(const Stack& stack, size_t i, size_t N) {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions)
return *(stack.end() - N + i);
}
inline const IValue& peek(const Stack* stack, size_t i, size_t N) {
static inline const IValue& peek(const Stack* stack, size_t i, size_t N) {
return peek(*stack, i, N);
}
// treat the last N elements of the stack as a list, looking up the
// slice starting at index i and having length len
inline at::ArrayRef<IValue> peekSlice(
static inline at::ArrayRef<IValue> peekSlice(
const Stack& stack,
size_t i,
size_t len,
size_t N) {
return at::ArrayRef<IValue>(stack).slice(stack.size() - N + i, len);
}
inline at::ArrayRef<IValue> last(const Stack& stack, size_t N) {
static inline at::ArrayRef<IValue> last(const Stack& stack, size_t N) {
return peekSlice(stack, 0, N, N);
}
inline at::ArrayRef<IValue> last(const Stack* stack, size_t N) {
static inline at::ArrayRef<IValue> last(const Stack* stack, size_t N) {
return last(*stack, N);
}
inline void drop(Stack& stack, size_t n) {
static inline void drop(Stack& stack, size_t n) {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions)
stack.erase(stack.end() - n, stack.end());
}
inline void drop(Stack* stack, size_t n) {
static inline void drop(Stack* stack, size_t n) {
drop(*stack, n);
}
inline IValue pop(Stack& stack) {
static inline IValue pop(Stack& stack) {
auto r = std::move(stack.back());
stack.pop_back();
return r;
}
inline IValue pop(Stack* stack) {
static inline IValue pop(Stack* stack) {
return pop(*stack);
}
inline std::vector<IValue> pop(Stack& stack, size_t n) {
static inline std::vector<IValue> pop(Stack& stack, size_t n) {
std::vector<IValue> result;
result.reserve(n);
for (const auto i : c10::irange(n)) {
@ -127,7 +127,7 @@ inline std::vector<IValue> pop(Stack& stack, size_t n) {
// b = pop(stack).toTensor();
// a = pop(stack).toInt();
template <typename... Types>
inline void pop(Stack& stack, Types&... args) {
static inline void pop(Stack& stack, Types&... args) {
size_t i = 0;
constexpr size_t N = sizeof...(args);
(void)std::initializer_list<int>{
@ -135,15 +135,15 @@ inline void pop(Stack& stack, Types&... args) {
drop(stack, N);
}
template <typename... Types>
inline void pop(Stack* stack, Types&... args) {
static inline void pop(Stack* stack, Types&... args) {
pop(*stack, args...);
}
template <typename Type>
inline void push_one(Stack& stack, Type&& arg) {
static inline void push_one(Stack& stack, Type&& arg) {
stack.emplace_back(std::forward<Type>(arg));
}
inline void push_one(Stack& stack, c10::TensorOptions options) {
static inline void push_one(Stack& stack, c10::TensorOptions options) {
stack.emplace_back(c10::typeMetaToScalarType(options.dtype()));
stack.emplace_back(options.layout());
stack.emplace_back(options.device());
@ -151,15 +151,15 @@ inline void push_one(Stack& stack, c10::TensorOptions options) {
}
template <typename... Types>
inline void push(Stack& stack, Types&&... args) {
static inline void push(Stack& stack, Types&&... args) {
(void)std::initializer_list<int>{(push_one(stack, std::forward<Types>(args)), 0)...};
}
template <typename... Types>
inline void push(Stack* stack, Types&&... args) {
static inline void push(Stack* stack, Types&&... args) {
return push(*stack, std::forward<Types>(args)...);
}
template <class T>
inline void push_list_elements(Stack& stack, const c10::List<T>& elements) {
static inline void push_list_elements(Stack& stack, const c10::List<T>& elements) {
for (T elem : elements) {
stack.push_back(std::move(elem));
}

View File

@ -4,21 +4,6 @@
#endif
namespace at::cpu {
bool is_cpu_support_avx2() {
#if !defined(__s390x__) && !defined(__powerpc__)
return cpuinfo_initialize() && cpuinfo_has_x86_avx2();
#else
return false;
#endif
}
bool is_cpu_support_avx512() {
#if !defined(__s390x__) && !defined(__powerpc__)
return cpuinfo_initialize() && cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512vl() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq();
#else
return false;
#endif
}
bool is_cpu_support_vnni() {
#if !defined(__s390x__) && !defined(__powerpc__)

View File

@ -4,9 +4,6 @@
namespace at::cpu {
TORCH_API bool is_cpu_support_avx2();
TORCH_API bool is_cpu_support_avx512();
// Detect if CPU support Vector Neural Network Instruction.
TORCH_API bool is_cpu_support_vnni();

View File

@ -170,6 +170,43 @@ CUDA_STUB3(cuLinkComplete, CUlinkState, void **, size_t *);
CUDA_STUB3(cuFuncSetAttribute, CUfunction, CUfunction_attribute, int);
CUDA_STUB3(cuFuncGetAttribute, int*, CUfunction_attribute, CUfunction);
#if defined(CUDA_VERSION) && CUDA_VERSION >= 12000
CUresult CUDAAPI
cuTensorMapEncodeTiled(
CUtensorMap* tensorMap,
CUtensorMapDataType tensorDataType,
cuuint32_t tensorRank,
void* globalAddress,
const cuuint64_t* globalDim,
const cuuint64_t* globalStrides,
const cuuint32_t* boxDim,
const cuuint32_t* elementStrides,
CUtensorMapInterleave interleave,
CUtensorMapSwizzle swizzle,
CUtensorMapL2promotion l2Promotion,
CUtensorMapFloatOOBfill oobFill) {
auto fn = reinterpret_cast<decltype(&cuTensorMapEncodeTiled)>(
getCUDALibrary().sym(__func__));
if (!fn)
throw std::runtime_error("Can't get cuTensorMapEncodeTiled");
lazyNVRTC.cuTensorMapEncodeTiled = fn;
return fn(
tensorMap,
tensorDataType,
tensorRank,
globalAddress,
globalDim,
globalStrides,
boxDim,
elementStrides,
interleave,
swizzle,
l2Promotion,
oobFill);
}
#endif
// Irregularly shaped functions
CUresult CUDAAPI cuLaunchKernel(CUfunction f,
unsigned int gridDimX,

View File

@ -34,8 +34,8 @@ struct PhiloxCudaState {
int64_t* ptr;
};
Payload seed_{};
Payload offset_{};
Payload seed_;
Payload offset_;
uint32_t offset_intragraph_ = 0;
bool captured_ = false;
};

View File

@ -59,16 +59,25 @@ namespace at { namespace cuda {
_(cuLinkAddData) \
_(cuLinkComplete) \
_(cuFuncSetAttribute) \
_(cuFuncGetAttribute)
_(cuFuncGetAttribute) \
#if defined(CUDA_VERSION) && CUDA_VERSION >= 12000
#define AT_FORALL_NVRTC_EXTENDED(_) \
AT_FORALL_NVRTC_BASE(_) \
_(cuTensorMapEncodeTiled)
#else
#define AT_FORALL_NVRTC_EXTENDED(_) \
AT_FORALL_NVRTC_BASE(_)
#endif
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11010
#define AT_FORALL_NVRTC(_) \
AT_FORALL_NVRTC_BASE(_) \
AT_FORALL_NVRTC_EXTENDED(_) \
_(nvrtcGetCUBINSize) \
_(nvrtcGetCUBIN)
#else
#define AT_FORALL_NVRTC(_) \
AT_FORALL_NVRTC_BASE(_)
AT_FORALL_NVRTC_EXTENDED(_)
#endif
#else

View File

@ -59,6 +59,13 @@ view_as_complex_batch_rule(const Tensor& self, optional<int64_t> self_bdim) {
return std::make_tuple(result, 0);
}
std::tuple<Tensor,optional<int64_t>>
to_other_batch_rule(const Tensor& self, optional<int64_t> self_bdim,
const Tensor& other, optional<int64_t> other_bdim,
bool non_blocking,
bool copy, std::optional<at::MemoryFormat> memory_format) {
return std::make_tuple(self.to(other, non_blocking, copy, memory_format), self_bdim);
}
}
TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {

View File

@ -31,6 +31,46 @@ Tensor index_select_backward_hack(const Tensor& grad, IntArrayRef self_sizes, in
return at::zeros(self_sizes, grad.options()).index_add(dim, index, grad);
}
static optional<std::tuple<Tensor,int64_t>> unwrap(const Tensor& tensor) {
auto* wrapped = maybeGetTensorWrapper(tensor);
if (wrapped) {
if (wrapped->level().has_value()) {
return std::make_tuple(wrapped->value(), *wrapped->level());
}
return unwrap(wrapped->value());
}
auto* batched = maybeGetBatchedImpl(tensor);
if (batched) {
return std::make_tuple(batched->value(), batched->level());
}
return nullopt;
}
static bool can_perform_inplace(const Tensor& a, const Tensor& b) {
// TODO: generalize this to more transforms
auto a_ = unwrap(a);
auto b_ = unwrap(b);
if (!a_.has_value() && b_.has_value()) {
return false;
}
if (!a_.has_value() && !b_.has_value()) {
return true;
}
if (a_.has_value() && !b_.has_value()) {
return true;
}
TORCH_INTERNAL_ASSERT(a_.has_value() && b_.has_value());
// If b has any wrapper that a does not, then we cannot do a.inplace_(b)
if (std::get<1>(*a_) < std::get<1>(*b_)) {
return false;
}
if (std::get<1>(*a_) > std::get<1>(*b_)) {
return can_perform_inplace(std::get<0>(*a_), b);
}
return can_perform_inplace(std::get<0>(*a_), std::get<0>(*b_));
}
// TODO: linear is pretty important for performance, but I'm not sure how to work
// around the in-place.
Tensor linear_hack(const Tensor& input, const Tensor& weight, const std::optional<Tensor>& bias_opt) {

View File

@ -23,7 +23,7 @@ enum class GeluType {
END
};
inline GeluType get_gelutype_enum(const c10::string_view approximate) {
static GeluType get_gelutype_enum(const c10::string_view approximate) {
if (approximate == "none") {
return GeluType::None;
} else if (approximate == "tanh") {
@ -33,7 +33,7 @@ inline GeluType get_gelutype_enum(const c10::string_view approximate) {
}
}
inline std::string gelutype_to_string(const GeluType type) {
static std::string gelutype_to_string(const GeluType type) {
switch(type) {
case GeluType::None: return "none";
case GeluType::Tanh: return "tanh";

View File

@ -28,15 +28,15 @@ using adaptive_max_pooling3d_backward_fn = void(*)(const Tensor& grad_input, con
DECLARE_DISPATCH(adaptive_max_pooling3d_fn, adaptive_max_pool3d_kernel);
DECLARE_DISPATCH(adaptive_max_pooling3d_backward_fn, adaptive_max_pool3d_backward_kernel);
inline int64_t start_index(int64_t a, int64_t b, int64_t c) {
static inline int64_t start_index(int64_t a, int64_t b, int64_t c) {
return (a / b) * c + ((a % b) * c) / b;
}
inline int64_t end_index(int64_t a, int64_t b, int64_t c) {
static inline int64_t end_index(int64_t a, int64_t b, int64_t c) {
return 1 + ((a + 1) * c - 1) / b;
}
inline void adaptive_pool_empty_output_check(const Tensor& gradOutput_, const char* arg_name) {
static inline void adaptive_pool_empty_output_check(const Tensor& gradOutput_, const char* arg_name) {
int64_t ndim = gradOutput_.ndimension();
for (const auto i : c10::irange(1, ndim)) {
TORCH_CHECK(gradOutput_.size(i) > 0,

View File

@ -1480,14 +1480,23 @@ Tensor& not_equal_(Tensor& self, const Scalar& other) { return self.ne_(other);
Tensor& logical_and_out(const Tensor& self, const Tensor& other, Tensor& result) { return comparison_op_out(result, self, other, logical_and_stub); }
Tensor logical_and(const Tensor& self, const Tensor& other) { return comparison_op(self, other, static_cast<OutFunc>(at::logical_and_out)); }
Tensor& logical_and_(Tensor& self, const Tensor& other) { return comparison_op_(self, other, static_cast<OutFunc>(at::logical_and_out)); }
static Tensor& logical_and_out(Tensor& result, const Tensor& self, const Scalar& other) { return comparison_op_out(result, self, other, static_cast<OutFunc>(at::logical_and_out)); }
static Tensor logical_and(const Tensor& self, const Scalar& other) { return comparison_op(self, other, static_cast<OutFunc>(at::logical_and_out)); }
static Tensor& logical_and_(Tensor& self, const Scalar& other) { return comparison_op_(self, other, static_cast<OutFunc>(at::logical_and_out)); }
Tensor& logical_or_out(const Tensor& self, const Tensor& other, Tensor& result) { return comparison_op_out(result, self, other, logical_or_stub); }
Tensor logical_or(const Tensor& self, const Tensor& other) { return comparison_op(self, other, static_cast<OutFunc>(at::logical_or_out)); }
Tensor& logical_or_(Tensor& self, const Tensor& other) { return comparison_op_(self, other, static_cast<OutFunc>(at::logical_or_out)); }
static Tensor& logical_or_out(Tensor& result, const Tensor& self, const Scalar& other) { return comparison_op_out(result, self, other, static_cast<OutFunc>(at::logical_or_out)); }
static Tensor logical_or(const Tensor& self, const Scalar& other) { return comparison_op(self, other, static_cast<OutFunc>(at::logical_or_out)); }
static Tensor& logical_or_(Tensor& self, const Scalar& other) { return comparison_op_(self, other, static_cast<OutFunc>(at::logical_or_out)); }
Tensor& logical_xor_out(const Tensor& self, const Tensor& other, Tensor& result) { return comparison_op_out(result, self, other, logical_xor_stub); }
Tensor logical_xor(const Tensor& self, const Tensor& other) { return comparison_op(self, other, static_cast<OutFunc>(at::logical_xor_out)); }
Tensor& logical_xor_(Tensor& self, const Tensor& other) { return comparison_op_(self, other, static_cast<OutFunc>(at::logical_xor_out)); }
static Tensor& logical_xor_out(Tensor& result, const Tensor& self, const Scalar& other) { return comparison_op_out(result, self, other, static_cast<OutFunc>(at::logical_xor_out)); }
static Tensor logical_xor(const Tensor& self, const Scalar& other) { return comparison_op(self, other, static_cast<OutFunc>(at::logical_xor_out)); }
static Tensor& logical_xor_(Tensor& self, const Scalar& other) { return comparison_op_(self, other, static_cast<OutFunc>(at::logical_xor_out)); }
// binary max, alias for maximum
Tensor& max_out(const Tensor& self, const Tensor& other, Tensor& result) {

View File

@ -75,7 +75,7 @@ namespace {
}
}
inline bool cudnnv8_enabled_check_debug() {
static inline bool cudnnv8_enabled_check_debug() {
static bool cudnnv8_flag = c10::utils::check_env("TORCH_CUDNN_V8_API_DISABLED") != true;
static bool cudnnv8_debug = c10::utils::check_env("TORCH_CUDNN_V8_API_DEBUG") == true;
static uint8_t cudnnv8_debugcount = 0;
@ -86,7 +86,7 @@ inline bool cudnnv8_enabled_check_debug() {
return cudnnv8_flag == 1;
}
inline bool cudnnv8_use_heur_mode_b() {
static inline bool cudnnv8_use_heur_mode_b() {
return is_cudnnv8_heuristic_mode_b();
}
@ -186,7 +186,7 @@ static void check_args(CheckedFrom c, IntArrayRef args, size_t expected_size, co
// (which the user can change) and computed inputs (which the user can
// only indirectly affect). It would be an interesting exercise to
// come up with a general framework to handle such situations.)
inline void convolution_shape_check(
static void convolution_shape_check(
CheckedFrom c,
const TensorGeometryArg& input, const TensorGeometryArg& weight, const TensorGeometryArg& output,
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups)
@ -212,7 +212,7 @@ inline void convolution_shape_check(
// takes an extra output_padding argument to resolve the ambiguity.
template <typename T>
inline std::vector<T> _conv_output_size(
static inline std::vector<T> _conv_output_size(
ArrayRef<T> input_size, ArrayRef<T> weight_size,
ArrayRef<T> padding, ArrayRef<T> stride, ArrayRef<T> dilation = ArrayRef<T>()
) {
@ -231,14 +231,14 @@ inline std::vector<T> _conv_output_size(
return output_size;
}
inline std::vector<int64_t> conv_output_size(
static inline std::vector<int64_t> conv_output_size(
IntArrayRef input_size, IntArrayRef weight_size,
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation = IntArrayRef()
) {
return _conv_output_size(input_size, weight_size, padding, stride, dilation);
}
inline std::vector<c10::SymInt> conv_output_size(
static inline std::vector<c10::SymInt> conv_output_size(
SymIntArrayRef input_size, SymIntArrayRef weight_size,
SymIntArrayRef padding, SymIntArrayRef stride, SymIntArrayRef dilation = SymIntArrayRef()
) {
@ -264,14 +264,14 @@ std::vector<T> _conv_input_size(
return input_size;
}
inline std::vector<c10::SymInt> conv_input_size(
static inline std::vector<c10::SymInt> conv_input_size(
SymIntArrayRef output_size, SymIntArrayRef weight_size,
SymIntArrayRef padding, SymIntArrayRef output_padding, SymIntArrayRef stride, SymIntArrayRef dilation, c10::SymInt groups
) {
return _conv_input_size(output_size, weight_size, padding, output_padding, stride, dilation, groups);
}
inline std::vector<int64_t> conv_input_size(
static inline std::vector<int64_t> conv_input_size(
IntArrayRef output_size, IntArrayRef weight_size,
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups
) {
@ -295,27 +295,27 @@ std::vector<T> _conv_weight_size(
return weight_size;
}
inline std::vector<c10::SymInt> conv_weight_size(
static inline std::vector<c10::SymInt> conv_weight_size(
SymIntArrayRef input_size, SymIntArrayRef output_size,
SymIntArrayRef padding, SymIntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups
) {
return _conv_weight_size(input_size, output_size, padding, output_padding, stride, dilation, groups);
}
inline std::vector<int64_t> conv_weight_size(
static inline std::vector<int64_t> conv_weight_size(
IntArrayRef input_size, IntArrayRef output_size,
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups
) {
return _conv_weight_size(input_size, output_size, padding, output_padding, stride, dilation, groups);
}
inline Tensor reshape_bias(int64_t dim, const Tensor& bias) {
static inline Tensor reshape_bias(int64_t dim, const Tensor& bias) {
std::vector<int64_t> shape(dim, 1);
shape[1] = -1;
return bias.reshape(shape);
}
inline at::MemoryFormat cudnn_conv_suggest_memory_format(const at::Tensor& input, const at::Tensor& weight) {
static inline at::MemoryFormat cudnn_conv_suggest_memory_format(const at::Tensor& input, const at::Tensor& weight) {
// disable NHWC for float64 input.
if (!at::detail::getCUDAHooks().compiledWithCuDNN() ||
input.scalar_type() == at::kDouble ||
@ -351,7 +351,7 @@ TORCH_API void _cudnn_set_conv_benchmark_empty_cache(bool enable);
TORCH_API bool _cudnn_get_conv_benchmark_empty_cache();
inline bool miopen_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
static inline bool miopen_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
// disable NHWC for float64 input.
if (!at::detail::getCUDAHooks().compiledWithMIOpen() ||
@ -378,7 +378,7 @@ inline bool miopen_conv_use_channels_last(const at::Tensor& input, const at::Ten
return can_use_miopen_channels_last_2d || can_use_miopen_channels_last_3d;
}
inline bool mkldnn_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
static inline bool mkldnn_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
// disable NHWC for float64 input.
if (input.scalar_type() == at::kDouble ||
@ -405,7 +405,7 @@ inline bool mkldnn_conv_use_channels_last(const at::Tensor& input, const at::Ten
return can_use_mkldnn_channels_last_2d || can_use_mkldnn_channels_last_3d;
}
inline bool thnn_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
static inline bool thnn_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
auto input_memory_format = input.suggest_memory_format();
auto weight_memory_format = weight.suggest_memory_format();
@ -417,7 +417,7 @@ inline bool thnn_conv_use_channels_last(const at::Tensor& input, const at::Tenso
return can_use_thnn_channels_last_2d;
}
inline bool xpu_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
static inline bool xpu_conv_use_channels_last(const at::Tensor& input, const at::Tensor& weight) {
// check layout only for xpu tensor.
if (!input.is_xpu() || !weight.is_xpu()) {

View File

@ -393,7 +393,7 @@ struct RegisterPRIVATEUSE1Dispatch {
// REGISTER_DISPATCH now dispatches an AVX512 kernel to nullptr but registers other dispatches.
// ALSO_REGISTER_AVX512_DISPATCH should be used for ensuring AVX512 dispatch, among others.
#ifdef CPU_CAPABILITY_AVX512
#define REGISTER_DISPATCH(name, fn) REGISTER_ARCH_DISPATCH(name, CPU_CAPABILITY, ((void*)(fn) ? nullptr : nullptr))
#define REGISTER_DISPATCH(name, fn) REGISTER_ARCH_DISPATCH(name, CPU_CAPABILITY, nullptr)
#else
#define REGISTER_DISPATCH(name, fn) REGISTER_ARCH_DISPATCH(name, CPU_CAPABILITY, fn)
#endif

View File

@ -254,7 +254,7 @@ C10_DEVICE scalar_t sample_binomial(scalar_t count, scalar_t prob, BaseSampler<a
* See note [3-Clause BSD License for the Cephes Math Library] in ATen/native/Math.h.
*/
template<typename scalar_t, typename accscalar_t>
C10_DEVICE inline scalar_t digamma_one(scalar_t x) {
C10_DEVICE static inline scalar_t digamma_one(scalar_t x) {
constexpr accscalar_t PSI_10 = 2.25175258906672110764;
if (x == 0) {
return INFINITY;
@ -376,7 +376,7 @@ C10_HOST_DEVICE scalar_t standard_gamma_grad_one(scalar_t alpha_, scalar_t x_) {
// Approximate reparameterized gradient of Beta(x,alpha,beta) wrt alpha.
// Assumes x is close to zero and uses a Taylor expansion.
template <typename scalar_t, typename accscalar_t>
C10_DEVICE inline scalar_t _beta_grad_alpha_small(scalar_t x, scalar_t alpha, scalar_t beta) {
C10_DEVICE static inline scalar_t _beta_grad_alpha_small(scalar_t x, scalar_t alpha, scalar_t beta) {
const scalar_t factor = digamma_one<scalar_t, accscalar_t>(alpha)
- digamma_one<scalar_t, accscalar_t>(alpha + beta) - compat_log(x);
scalar_t numer = 1;
@ -394,7 +394,7 @@ C10_DEVICE inline scalar_t _beta_grad_alpha_small(scalar_t x, scalar_t alpha, sc
// Approximate reparameterized gradient of Beta(x,alpha,beta) wrt beta.
// Assumes x is close to zero and uses a Taylor expansion.
template <typename scalar_t, typename accscalar_t>
C10_DEVICE inline scalar_t _beta_grad_beta_small(scalar_t x, scalar_t alpha, scalar_t beta) {
C10_DEVICE static inline scalar_t _beta_grad_beta_small(scalar_t x, scalar_t alpha, scalar_t beta) {
const scalar_t factor = digamma_one<scalar_t, accscalar_t>(alpha + beta) - digamma_one<scalar_t, accscalar_t>(beta);
scalar_t numer = 1, betas = 1, dbetas = 0, series = factor / alpha;
for (int i = 1; i <= 8; ++i) {
@ -412,7 +412,7 @@ C10_DEVICE inline scalar_t _beta_grad_beta_small(scalar_t x, scalar_t alpha, sca
// Assumes alpha and beta are both large and uses a Rice saddle point expansion.
// To ensure numerical stability, this computation is performed at higher precision.
template<typename scalar_t, typename accscalar_t>
C10_DEVICE inline scalar_t _beta_grad_alpha_mid(accscalar_t x, accscalar_t alpha, accscalar_t beta) {
C10_DEVICE static inline scalar_t _beta_grad_alpha_mid(accscalar_t x, accscalar_t alpha, accscalar_t beta) {
const accscalar_t total = alpha + beta;
const accscalar_t mean = alpha / total;
const accscalar_t std = compat_sqrt(alpha * beta / (total + 1)) / total;
@ -452,7 +452,7 @@ C10_DEVICE inline scalar_t _beta_grad_alpha_mid(accscalar_t x, accscalar_t alpha
// This function inputs total=alpha+beta to make it easy to implement
// Dirichlet reparameterized gradients in terms of Betas.
template<typename scalar_t, typename accscalar_t>
C10_HOST_DEVICE inline scalar_t dirichlet_grad_one(scalar_t x, scalar_t alpha, scalar_t total) {
C10_HOST_DEVICE static inline scalar_t dirichlet_grad_one(scalar_t x, scalar_t alpha, scalar_t total) {
accscalar_t x_ = static_cast<accscalar_t>(x);
accscalar_t alpha_ = static_cast<accscalar_t>(alpha);
accscalar_t total_ = static_cast<accscalar_t>(total);

View File

@ -6,7 +6,7 @@
namespace at::native {
template<typename scalar_t>
inline std::vector<int> generate_intervals(
static inline std::vector<int> generate_intervals(
scalar_t sample,
int64_t inputSize,
int64_t outputSize,
@ -28,7 +28,7 @@ inline std::vector<int> generate_intervals(
}
template <int64_t ndim>
inline void fractional_max_pool_check_shape(
static inline void fractional_max_pool_check_shape(
const Tensor& input,
const Tensor& randomSamples) {

View File

@ -856,7 +856,7 @@ namespace {
/**
* @brief Computes the optimal matrix chain multiplication order
*
* Follows the dynamic programming algorithm from Cormen et al.,
* Follows the dynamic programming algorithm from Cormen et al,
* "Introduction to Algorithms, Third Edition", Chapter 15.2,
* p. 370-378. Note that the book uses 1-based indexing.
*

View File

@ -27,7 +27,7 @@
namespace at::native {
inline c10::MaybeOwned<Tensor> expect_resolved_conj(const Tensor& tensor) {
static inline c10::MaybeOwned<Tensor> expect_resolved_conj(const Tensor& tensor) {
if (tensor.is_conj()) {
return c10::MaybeOwned<Tensor>::owned(tensor.resolve_conj());
} else {
@ -35,7 +35,7 @@ inline c10::MaybeOwned<Tensor> expect_resolved_conj(const Tensor& tensor) {
}
}
inline DimVector batched_matrix_contiguous_strides(
static inline DimVector batched_matrix_contiguous_strides(
const IntArrayRef sizes,
const bool f_contig = false) {
// f_contig chooses between the strides of a batch of Fortran (F-contiguous)
@ -62,7 +62,7 @@ inline DimVector batched_matrix_contiguous_strides(
* P.data_ptr()[B * M * N] is of the same corresponding batch as the M' by N'
* matrix starting at Q.data_ptr()[B * M' * N'].
*/
inline Tensor cloneBatchedColumnMajor(const Tensor& src) {
static inline Tensor cloneBatchedColumnMajor(const Tensor& src) {
// If src is already in batched column major format, then
// this will be efficient (no reordering of the data will occur)
// because the first transpose will make the tensor contiguous,
@ -75,7 +75,7 @@ inline Tensor cloneBatchedColumnMajor(const Tensor& src) {
/*
* contig chooses between C-contig (true) and F-contig (false)
*/
inline c10::MaybeOwned<Tensor> borrow_else_clone(const bool cond, const Tensor& borrow, const Tensor& clone, const bool contig) {
static inline c10::MaybeOwned<Tensor> borrow_else_clone(const bool cond, const Tensor& borrow, const Tensor& clone, const bool contig) {
return cond ? c10::MaybeOwned<Tensor>::borrowed(borrow)
: c10::MaybeOwned<Tensor>::owned(contig ? clone.clone(MemoryFormat::Contiguous)
: cloneBatchedColumnMajor(clone));
@ -92,7 +92,7 @@ inline c10::MaybeOwned<Tensor> borrow_else_clone(const bool cond, const Tensor&
* which is either the original batch size of the input, or its larger
* broadcasted shape.
*/
inline Tensor copyBatchedColumnMajor(const Tensor& src, int64_t nrows = -1,
static inline Tensor copyBatchedColumnMajor(const Tensor& src, int64_t nrows = -1,
at::OptionalIntArrayRef desired_batch_sizes = c10::nullopt) {
nrows = (nrows == -1) ? src.size(-2) : nrows;
auto copy_sizes = desired_batch_sizes.has_value()
@ -109,7 +109,7 @@ inline Tensor copyBatchedColumnMajor(const Tensor& src, int64_t nrows = -1,
* Given batches of matrices with arbitrary batch dim,
* computes the number of batches.
*/
inline int64_t batchCount(const Tensor& batched_matrices) {
static inline int64_t batchCount(const Tensor& batched_matrices) {
int64_t result = 1;
for (int64_t i = 0; i < batched_matrices.ndimension() - 2; i++) {
result *= batched_matrices.size(i);
@ -118,15 +118,15 @@ inline int64_t batchCount(const Tensor& batched_matrices) {
}
// Computes the number of elements of a matrix in a batched matrix tensor
inline int64_t matrixStride(const Tensor& batched_matrices) {
static inline int64_t matrixStride(const Tensor& batched_matrices) {
return batched_matrices.size(-1) * batched_matrices.size(-2);
}
// Validates input shapes for operations on batches of square matrices (inverse, cholesky, symeig, eig)
inline void checkIsMatrix(const Tensor& A, const char* const f_name, const char* const arg_name = "A") {
static inline void checkIsMatrix(const Tensor& A, const char* const f_name, const char* const arg_name = "A") {
TORCH_CHECK(A.dim() >= 2, f_name, ": The input tensor ", arg_name, " must have at least 2 dimensions.");
}
inline void squareCheckInputs(const Tensor& self, const char* const f_name, const char* const arg_name = "A") {
static inline void squareCheckInputs(const Tensor& self, const char* const f_name, const char* const arg_name = "A") {
checkIsMatrix(self, f_name, arg_name);
TORCH_CHECK(self.sym_size(-1) == self.sym_size(-2),
f_name,
@ -134,7 +134,7 @@ inline void squareCheckInputs(const Tensor& self, const char* const f_name, cons
"but they are ", self.sym_size(-2), " by ", self.sym_size(-1), " matrices");
}
inline void checkInputsSolver(const Tensor& A,
static inline void checkInputsSolver(const Tensor& A,
const Tensor& B,
const bool left,
const char* const f_name) {
@ -146,14 +146,14 @@ inline void checkInputsSolver(const Tensor& A,
" (", A.size(-2), "x", A.size(-1), " and ", B.size(-2), "x", B.size(-1), ")");
}
inline bool is_row_or_column_contiguous(const Tensor& t) {
static inline bool is_row_or_column_contiguous(const Tensor& t) {
// This could be made more general, similar to how it's checked in matmul, which would allow to
// ellide the copy with strides such as (6, 12, 1, 3) or (3, 1, 9), but this is quite tricky.
// We choose to be conservative for simplicity
return t.is_contiguous() || t.transpose(-2, -1).is_contiguous();
}
inline TransposeType to_transpose_type(const bool contig, const bool conj) {
static inline TransposeType to_transpose_type(const bool contig, const bool conj) {
if (conj) {
if (contig) { TORCH_INTERNAL_ASSERT(false, "Invalid transpose type"); }
else { return TransposeType::ConjTranspose; }
@ -261,7 +261,7 @@ void batch_iterator_with_broadcasting(const Tensor& a, const Tensor& b, const fu
}
// Returns the epsilon value for floating types except half
inline double _get_epsilon(const ScalarType& sc_type) {
static inline double _get_epsilon(const ScalarType& sc_type) {
switch (sc_type) {
case at::ScalarType::Float:
return static_cast<double>(std::numeric_limits<float>::epsilon());
@ -274,7 +274,7 @@ inline double _get_epsilon(const ScalarType& sc_type) {
// Validates input shapes and devices
// for linear solve methods (solve, cholesky_solve, lu_solve, triangular_solve)
inline void linearSolveCheckInputs(const Tensor& self, const Tensor& A, const char* name) {
static inline void linearSolveCheckInputs(const Tensor& self, const Tensor& A, const char* name) {
TORCH_CHECK(self.device() == A.device(),
"Expected b and A to be on the same device, but found b on ",
self.device(), " and A on ", A.device(), " instead.");
@ -293,7 +293,7 @@ inline void linearSolveCheckInputs(const Tensor& self, const Tensor& A, const ch
" but each b matrix is ", self.size(-2), " by ", self.size(-1));
}
inline void checkFloatingOrComplex(const Tensor& t, const char* const f_name, const bool allow_low_precision_dtypes=true) {
static inline void checkFloatingOrComplex(const Tensor& t, const char* const f_name, const bool allow_low_precision_dtypes=true) {
auto dtype = t.scalar_type();
TORCH_CHECK((at::isFloatingType(dtype) || at::isComplexType(dtype)),
f_name, ": Expected a floating point or complex tensor as input. Got ", dtype);
@ -305,13 +305,13 @@ inline void checkFloatingOrComplex(const Tensor& t, const char* const f_name, co
// Checks if all the Tensors in a TensorList are of the same dimensions
inline void checkAllSameDim(TensorList tensors, int64_t dim) {
static inline void checkAllSameDim(TensorList tensors, int64_t dim) {
for (auto &t : tensors) {
TORCH_CHECK(t.dim() == dim, "Tensor dimension is ", t.dim(), ", expected ", dim, " instead.");
}
}
inline std::tuple<std::vector<int64_t>, std::vector<int64_t>> _linalg_broadcast_batch_dims(const Tensor& arg1, const Tensor& arg2) {
static inline std::tuple<std::vector<int64_t>, std::vector<int64_t>> _linalg_broadcast_batch_dims(const Tensor& arg1, const Tensor& arg2) {
// broadcast the batch dimensions of arg1 and arg2.
IntArrayRef arg1_batch_sizes(arg1.sizes().data(), arg1.ndimension() - 2);
IntArrayRef arg2_batch_sizes(arg2.sizes().data(), arg2.ndimension() - 2);
@ -325,7 +325,7 @@ inline std::tuple<std::vector<int64_t>, std::vector<int64_t>> _linalg_broadcast_
return std::make_tuple(std::move(arg1_expand_size), std::move(arg2_expand_size));
}
inline std::tuple<Tensor,Tensor> _linalg_broadcast_batch_dims(const Tensor& arg1, const Tensor& arg2, const char* name) {
static inline std::tuple<Tensor,Tensor> _linalg_broadcast_batch_dims(const Tensor& arg1, const Tensor& arg2, const char* name) {
// If there's no name we assume we don't want to check the errors
if (name != nullptr) {
linearSolveCheckInputs(arg1, arg2, name);
@ -338,7 +338,7 @@ inline std::tuple<Tensor,Tensor> _linalg_broadcast_batch_dims(const Tensor& arg1
return std::make_tuple(arg1_broadcasted, arg2_broadcasted);
}
inline std::vector<int64_t> broadcast_batch_size(const Tensor& t1, const Tensor& t2, int64_t n_batch_dims) {
static inline std::vector<int64_t> broadcast_batch_size(const Tensor& t1, const Tensor& t2, int64_t n_batch_dims) {
IntArrayRef t1_batch_sizes(t1.sizes().data(), n_batch_dims);
IntArrayRef t2_batch_sizes(t2.sizes().data(), n_batch_dims);
auto broadcasted_batch_sizes = infer_size(t1_batch_sizes, t2_batch_sizes);
@ -346,7 +346,7 @@ inline std::vector<int64_t> broadcast_batch_size(const Tensor& t1, const Tensor&
}
// Return a permutation with the given axes moved to the end.
inline Tensor _move_to_end(const Tensor& self, IntArrayRef axes) {
static inline Tensor _move_to_end(const Tensor& self, IntArrayRef axes) {
const std::vector<int64_t> a = axes.vec();
const int64_t ndim = self.ndimension();
std::vector<int64_t> perm;
@ -368,7 +368,7 @@ inline Tensor _move_to_end(const Tensor& self, IntArrayRef axes) {
}
// parse the "mode" param in linalg_qr: return a tuple of bools (compute_q, reduced)
inline std::tuple<bool, bool> _parse_qr_mode(c10::string_view mode) {
static inline std::tuple<bool, bool> _parse_qr_mode(c10::string_view mode) {
bool compute_q;
bool reduced;
if (mode == "reduced") {
@ -388,7 +388,7 @@ inline std::tuple<bool, bool> _parse_qr_mode(c10::string_view mode) {
}
// Function to compute sizes, strides and the extra columns for the Q matrix in the QR Decomposition
inline std::tuple<DimVector, DimVector, int64_t> _compute_geometry_for_Q(
static inline std::tuple<DimVector, DimVector, int64_t> _compute_geometry_for_Q(
const Tensor& input,
bool reduced) {
int64_t m = input.size(-2), n = input.size(-1);
@ -407,7 +407,7 @@ inline std::tuple<DimVector, DimVector, int64_t> _compute_geometry_for_Q(
return std::make_tuple(q_sizes, q_strides, n_columns_q);
}
inline bool svd_uses_cusolver(const Tensor& A) {
static inline bool svd_uses_cusolver(const Tensor& A) {
// if cusolver is available, it is used unconditionally
return A.is_cuda()
&& at::globalContext().hasCuSOLVER()
@ -417,7 +417,7 @@ inline bool svd_uses_cusolver(const Tensor& A) {
// Function used instead of .to so that the original strides are retained
// .to doesn't retain strides and make the output tensor contiguous
inline Tensor same_stride_to(const Tensor& original_tensor, const at::TensorOptions& options) {
static inline Tensor same_stride_to(const Tensor& original_tensor, const at::TensorOptions& options) {
auto strided_to = at::empty_strided(original_tensor.sizes(),
original_tensor.strides(),
options);
@ -433,7 +433,7 @@ inline Tensor same_stride_to(const Tensor& original_tensor, const at::TensorOpti
// For instance, given a 4-D tensor, dimensions 1 and 3 can be shifted to the end by
// calling `create_dim_backshift_permutation(1, 3, 4)`. The resulting vector will
// be `vec(0, 2, 1, 3)`.
inline std::vector<int64_t> create_dim_backshift_permutation(int64_t dim0, int64_t dim1, int64_t ndim) {
static inline std::vector<int64_t> create_dim_backshift_permutation(int64_t dim0, int64_t dim1, int64_t ndim) {
TORCH_CHECK(
(dim0 != dim1) && (dim0 < ndim) && (dim0 >= 0) && (dim1 < ndim) && (dim1 >= 0),
"duplicate or invalid dimensions");
@ -453,7 +453,7 @@ inline std::vector<int64_t> create_dim_backshift_permutation(int64_t dim0, int64
// will reverse a given permutation.
// The reverse permutation array is created by swapping the indices and their
// associated values from the given permutation array.
inline std::vector<int64_t> create_reverse_permutation(std::vector<int64_t> permutation) {
static inline std::vector<int64_t> create_reverse_permutation(std::vector<int64_t> permutation) {
int64_t ndim = permutation.size();
std::vector<int64_t> reverse_permutation(ndim);
for (const auto dim_ind : c10::irange(ndim)) {
@ -464,7 +464,7 @@ inline std::vector<int64_t> create_reverse_permutation(std::vector<int64_t> perm
// Compute R-work array size for MAGMA/LAPACK cgesdd/zgesdd
// See https://github.com/Reference-LAPACK/lapack/blob/122506cd8b6ce050a200920c3d4c0b153b150fd8/SRC/cgesdd.f#L186
inline int64_t computeLRWorkDim(const char jobz, int64_t m, int64_t n) {
static inline int64_t computeLRWorkDim(const char jobz, int64_t m, int64_t n) {
auto mn = std::min(m, n);
auto mx = std::max(m, n);
if (jobz == 'N') {
@ -484,14 +484,14 @@ inline int64_t computeLRWorkDim(const char jobz, int64_t m, int64_t n) {
// This function checks whether the uplo argument input is valid
// Allowed strings are "u", "U", "l", "L"
inline void checkUplo(const c10::string_view uplo) {
static inline void checkUplo(const c10::string_view uplo) {
// To use std::toupper safely with plain chars (or signed chars), the argument should first be converted to unsigned char
char uplo_uppercase = static_cast<char>(std::toupper(static_cast<unsigned char>(uplo[0])));
TORCH_CHECK(uplo.size() == 1 && (uplo_uppercase == 'U' || uplo_uppercase == 'L'),
"Expected UPLO argument to be 'L' or 'U', but got ", uplo);
}
inline void checkSameDevice(const std::string& fn_name, Tensor result, Tensor input, const std::string& result_name = "result") {
static inline void checkSameDevice(const std::string& fn_name, Tensor result, Tensor input, const std::string& result_name = "result") {
TORCH_CHECK(
result.device() == input.device(),
fn_name,
@ -504,7 +504,7 @@ inline void checkSameDevice(const std::string& fn_name, Tensor result, Tensor in
// (either floating or complex type input), so we can check whether input's dtype can be casted to result's dtype.
// According to https://github.com/pytorch/pytorch/wiki/Developer-FAQ#how-does-out-work-in-pytorch
// c10::canCast is used for checking the "safe copy" dtype requirements.
inline void checkLinalgCompatibleDtype(const std::string& fn_name, Tensor result, Tensor input, const std::string& result_name = "result") {
static inline void checkLinalgCompatibleDtype(const std::string& fn_name, Tensor result, Tensor input, const std::string& result_name = "result") {
bool can_cast = c10::canCast(input.scalar_type(), result.scalar_type());
TORCH_CHECK(
can_cast,
@ -514,7 +514,7 @@ inline void checkLinalgCompatibleDtype(const std::string& fn_name, Tensor result
}
// Alternatively, we can check whether the specific expected output type (result_type) can be safely casted to out tensor dtype (out_type)
inline void checkLinalgCompatibleDtype(const std::string& fn_name, ScalarType out_type, ScalarType result_type, const std::string& out_name = "result") {
static inline void checkLinalgCompatibleDtype(const std::string& fn_name, ScalarType out_type, ScalarType result_type, const std::string& out_name = "result") {
bool can_cast = c10::canCast(result_type, out_type);
TORCH_CHECK(
can_cast,
@ -523,7 +523,7 @@ inline void checkLinalgCompatibleDtype(const std::string& fn_name, ScalarType ou
out_name, " with dtype ", out_type);
}
inline void checkNotComplexTolerance(const Tensor& tol, const c10::string_view f_name, const c10::string_view tol_name) {
static inline void checkNotComplexTolerance(const Tensor& tol, const c10::string_view f_name, const c10::string_view tol_name) {
TORCH_CHECK(!at::isComplexType(tol.scalar_type()),
f_name, ": ", tol_name, " tensor of complex type is not supported. Got ", tol.scalar_type());
}
@ -538,7 +538,7 @@ inline void checkNotComplexTolerance(const Tensor& tol, const c10::string_view f
Let input.shape = (batch_dimensions, m, n), then 'other' is of vector type if other.shape == (batch_dimensions, m).
This rule is compatible with NumPy, see https://github.com/numpy/numpy/blob/v1.20.0/numpy/linalg/linalg.py#L384-L389
*/
inline bool linalg_solve_is_vector_rhs(const Tensor& input, const Tensor& other) {
static inline bool linalg_solve_is_vector_rhs(const Tensor& input, const Tensor& other) {
auto expected_batched_rhs_shape = SymIntArrayRef(input.sym_sizes().data(), input.dim() - 1); // input.shape[:-1]
bool vector_case = other.dim() == 1 || (input.dim() - 1 == other.dim() && other.sym_sizes().equals(expected_batched_rhs_shape));
return vector_case;
@ -547,7 +547,7 @@ inline bool linalg_solve_is_vector_rhs(const Tensor& input, const Tensor& other)
/*
Computes linear indices for a tensor with original_shape to access its elements like it was a materialized broadcast tensor.
*/
inline Tensor get_linear_indices(int64_t numel, IntArrayRef original_shape, IntArrayRef broadcast_shape) {
static inline Tensor get_linear_indices(int64_t numel, IntArrayRef original_shape, IntArrayRef broadcast_shape) {
TensorOptions options = at::TensorOptions().dtype(at::kLong).device(at::kCPU);
return at::arange(numel, options).view(original_shape).broadcast_to(broadcast_shape).contiguous();
}
@ -578,7 +578,7 @@ class BroadcastLinearIndices {
}
};
inline bool is_blas_compatible_column_major_order(const Tensor& input) {
static inline bool is_blas_compatible_column_major_order(const Tensor& input) {
IntArrayRef input_strides = input.strides();
IntArrayRef input_sizes = input.sizes();
auto ndim = input.dim();
@ -599,7 +599,7 @@ inline bool is_blas_compatible_column_major_order(const Tensor& input) {
batch_stride_compatible;
}
inline bool is_blas_compatible_row_major_order(const Tensor& input) {
static inline bool is_blas_compatible_row_major_order(const Tensor& input) {
IntArrayRef input_strides = input.strides();
IntArrayRef input_sizes = input.sizes();
auto ndim = input.dim();

View File

@ -2,9 +2,9 @@
// Licensed under the BSD-3-Clause license
// This is the CPU implementation of the Connectionist Temporal Loss.
// We mostly follow Graves.
// 1. Graves et al.: http://www.cs.toronto.edu/~graves/icml_2006.pdf
// 1. Graves et al: http://www.cs.toronto.edu/~graves/icml_2006.pdf
// We use the equations from above link, but note that [1] has 1-based indexing and we (of course) use 0-based.
// Graves et al. call the probabilities y, we use log_probs (also calling them inputs)
// Graves et al call the probabilities y, we use log_probs (also calling them inputs)
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>

View File

@ -675,6 +675,15 @@ Tensor nll_loss_symint(const Tensor & self, const Tensor & target, const std::op
return std::get<0>(at::nll_loss_forward_symint(self, target, weight, reduction, std::move(ignore_index)));
}
// Duplicate of above code for non-symbolic ints. Kept for BC purposes and to minimize breakages.
static Tensor nll_loss(const Tensor & self, const Tensor & target, const std::optional<Tensor>& weight_opt, int64_t reduction, int64_t ignore_index) {
// See [Note: hacky wrapper removal for optional tensor]
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
return std::get<0>(at::nll_loss_forward_symint(self, target, weight, reduction, ignore_index));
}
Tensor nll_loss_nd_symint(
const Tensor& self,
const Tensor& target,

View File

@ -499,4 +499,13 @@ Tensor nll_loss2d_symint(const Tensor & self, const Tensor & target, const std::
return std::get<0>(at::nll_loss2d_forward_symint(self, target, weight, reduction, std::move(ignore_index)));
}
// Duplicate of above code for non-symbolic ints. Kept for BC purposes and to minimize breakages.
static Tensor nll_loss2d(const Tensor & self, const Tensor & target, const std::optional<Tensor>& weight_opt, int64_t reduction, int64_t ignore_index) {
// See [Note: hacky wrapper removal for optional tensor]
c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
return std::get<0>(at::nll_loss2d_forward_symint(self, target, weight, reduction, ignore_index));
}
} // namespace at::native

View File

@ -147,7 +147,7 @@ jiterator_also_stringify_as(jiterator_code(
#define CENTRAL_RANGE 0.7
template <typename T>
inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
static inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_erfinv(T y) {
/* Function to calculate inverse error function. Rational approximation
is used to generate an initial approximation, which is then improved to
@ -232,7 +232,7 @@ Date: February 1996
* See note [3-Clause BSD License for the Cephes Math Library].
*/
template <typename scalar_t, bool is_cuda=false>
C10_HOST_DEVICE inline scalar_t zeta(scalar_t x, scalar_t q) __ubsan_ignore_float_divide_by_zero__ {
C10_HOST_DEVICE static inline scalar_t zeta(scalar_t x, scalar_t q) __ubsan_ignore_float_divide_by_zero__ {
using acc_t = at::acc_type<scalar_t, is_cuda>;
const acc_t MACHEP = acc_t{1.11022302462515654042E-16};
constexpr acc_t zero = acc_t{0.0};
@ -324,7 +324,7 @@ C10_HOST_DEVICE inline scalar_t zeta(scalar_t x, scalar_t q) __ubsan_ignore_floa
* N 0
*/
template <typename T>
C10_HOST_DEVICE inline T polevl(const T x, const T A[], size_t len) {
C10_HOST_DEVICE static inline T polevl(const T x, const T A[], size_t len) {
T result = 0;
for (size_t i = 0; i <= len; i++) {
result = result * x + A[i];
@ -332,7 +332,7 @@ C10_HOST_DEVICE inline T polevl(const T x, const T A[], size_t len) {
return result;
}
inline double trigamma(double x) __ubsan_ignore_float_divide_by_zero__ {
static inline double trigamma(double x) __ubsan_ignore_float_divide_by_zero__ {
double sign = +1;
double result = 0;
if (x < 0.5) {
@ -350,7 +350,7 @@ inline double trigamma(double x) __ubsan_ignore_float_divide_by_zero__ {
return sign * result;
}
inline float trigamma(float x) __ubsan_ignore_float_divide_by_zero__ {
static inline float trigamma(float x) __ubsan_ignore_float_divide_by_zero__ {
float sign = +1;
float result = 0;
if (x < 0.5f) {
@ -372,7 +372,7 @@ inline float trigamma(float x) __ubsan_ignore_float_divide_by_zero__ {
* This function is derived from the implementation of the digamma function in the Cephes Math Library.
* See note [3-Clause BSD License for the Cephes Math Library].
*/
inline double calc_digamma(double x) {
static inline double calc_digamma(double x) {
// [C++ Standard Reference: Gamma Function] https://en.cppreference.com/w/cpp/numeric/math/tgamma
static double PSI_10 = 2.25175258906672110764;
if (x == 0) {
@ -430,7 +430,7 @@ inline double calc_digamma(double x) {
* This function is derived from the implementation of the digamma function in the Cephes Math Library.
* See note [3-Clause BSD License for the Cephes Math Library].
*/
inline float calc_digamma(float x) {
static inline float calc_digamma(float x) {
// See [C++ Standard Reference: Gamma Function]
static float PSI_10 = 2.25175258906672110764f;
if (x == 0) {
@ -485,16 +485,16 @@ inline float calc_digamma(float x) {
return result + logf(x) - (0.5f / x) - y;
}
inline c10::BFloat16 calc_digamma(c10::BFloat16 a) {
static inline c10::BFloat16 calc_digamma(c10::BFloat16 a) {
return calc_digamma(static_cast<float>(a));
}
inline c10::Half calc_digamma(c10::Half a) {
static inline c10::Half calc_digamma(c10::Half a) {
return calc_digamma(static_cast<float>(a));
}
template <typename scalar_t, bool is_cuda=false>
inline C10_HOST_DEVICE scalar_t calc_polygamma(scalar_t x, int n) {
static inline C10_HOST_DEVICE scalar_t calc_polygamma(scalar_t x, int n) {
// already blocked if n <= 1
const auto one = scalar_t{1};
return ((n % 2) ? one : -one) *
@ -508,7 +508,7 @@ inline C10_HOST_DEVICE scalar_t calc_polygamma(scalar_t x, int n) {
/* References
* [igam1] "The Digital Library of Mathematical Functions", dlmf.nist.gov
* [igam2] Maddock et al., "Incomplete Gamma Functions",
* [igam2] Maddock et. al., "Incomplete Gamma Functions",
* https://www.boost.org/doc/libs/1_61_0/libs/math/doc/html/math_toolkit/sf_gamma/igamma.html
*/
@ -519,7 +519,7 @@ inline C10_HOST_DEVICE scalar_t calc_polygamma(scalar_t x, int n) {
* See NOTICE for the licenses.
*/
template <typename scalar_t>
scalar_t ratevl(scalar_t x, const scalar_t num[], int64_t M,
static scalar_t ratevl(scalar_t x, const scalar_t num[], int64_t M,
const scalar_t denom[], int64_t N) {
// evaluating rational function, i.e., the ratio of two polynomials
// the coefficients for numerator are given by `num` while coeffs for
@ -1061,7 +1061,7 @@ static scalar_t _igamc_helper_continued_fraction(scalar_t a, scalar_t x) {
}
template <typename scalar_t>
inline scalar_t calc_igammac(scalar_t a, scalar_t x) {
static inline scalar_t calc_igammac(scalar_t a, scalar_t x) {
/* the calculation of the regularized upper incomplete gamma function
* is done differently based on the values of a and x:
* - if x and/or a is at the boundary of defined region, then assign the
@ -1141,7 +1141,7 @@ inline scalar_t calc_igammac(scalar_t a, scalar_t x) {
}
template <typename scalar_t>
scalar_t calc_igamma(scalar_t a, scalar_t x) {
static inline scalar_t calc_igamma(scalar_t a, scalar_t x) {
/* the calculation of the regularized lower incomplete gamma function
* is done differently based on the values of a and x:
* - if x and/or a is at the boundary of defined region, then assign the
@ -1203,39 +1203,39 @@ scalar_t calc_igamma(scalar_t a, scalar_t x) {
}
template <>
C10_UNUSED inline c10::BFloat16 calc_igamma<c10::BFloat16>(c10::BFloat16 a, c10::BFloat16 x) {
C10_UNUSED c10::BFloat16 calc_igamma<c10::BFloat16>(c10::BFloat16 a, c10::BFloat16 x) {
return calc_igamma<float>(float(a), float(x));
}
template <>
C10_UNUSED inline c10::Half calc_igamma<c10::Half>(c10::Half a, c10::Half x) {
C10_UNUSED c10::Half calc_igamma<c10::Half>(c10::Half a, c10::Half x) {
return calc_igamma<float>(float(a), float(x));
}
template <>
C10_UNUSED inline c10::BFloat16 calc_igammac<c10::BFloat16>(c10::BFloat16 a, c10::BFloat16 x) {
C10_UNUSED c10::BFloat16 calc_igammac<c10::BFloat16>(c10::BFloat16 a, c10::BFloat16 x) {
return calc_igammac<float>(float(a), float(x));
}
template <>
C10_UNUSED inline c10::Half calc_igammac<c10::Half>(c10::Half a, c10::Half x) {
C10_UNUSED c10::Half calc_igammac<c10::Half>(c10::Half a, c10::Half x) {
return calc_igammac<float>(float(a), float(x));
}
inline c10::BFloat16 calc_erfinv(c10::BFloat16 a) { return calc_erfinv(float(a)); }
template <typename T>
inline T abs_impl(T v) {
static T abs_impl(T v) {
return std::abs(v);
}
template <>
C10_UNUSED inline uint8_t abs_impl(uint8_t v) {
C10_UNUSED uint8_t abs_impl(uint8_t v) {
return v;
}
template <typename T>
inline typename std::enable_if<std::is_integral<T>::value, T>::type
static inline typename std::enable_if<std::is_integral<T>::value, T>::type
calc_gcd(T a, T b) {
a = abs_impl(a);
b = abs_impl(b);
@ -1284,7 +1284,7 @@ C10_HOST_DEVICE c10::complex<T> exp2_impl(c10::complex<T> x) {
* required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity, this becomes x -> 4a/x - 1.
*/
template <typename T>
inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
static inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
chbevl(const T x, const T array[], size_t len) {
T b0, b1, b2;
@ -1310,7 +1310,7 @@ chbevl(const T x, const T array[], size_t len) {
* of all inputs to convert them into the domain of the approximation.
*/
template <typename T>
inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_A() {
static inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_A() {
/* Chebyshev coefficients for exp(-x) I0(x)
* in the interval [0,8].
*
@ -1336,7 +1336,7 @@ inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_A() {
};
template <typename T>
inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_B() {
static inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_B() {
/* Chebyshev coefficients for exp(-x) sqrt(x) I0(x)
* in the inverted interval [8,infinity].
*
@ -1361,7 +1361,7 @@ inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_B() {
};
template <typename T>
inline typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type
static inline typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type
chebyshev_coefficients_i1e_A() {
/* Chebyshev coefficients for exp(-x) I1(x)
* in the interval [0,8].
@ -1388,7 +1388,7 @@ chebyshev_coefficients_i1e_A() {
};
template <typename T>
inline typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type
static inline typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type
chebyshev_coefficients_i1e_A() {
/* Chebyshev coefficients for exp(-x) I1(x)
* in the interval [0,8].
@ -1417,7 +1417,7 @@ chebyshev_coefficients_i1e_A() {
};
template <typename T>
inline typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type
static inline typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type
chebyshev_coefficients_i1e_B() {
/* Chebyshev coefficients for exp(-x) sqrt(x) I1(x)
* in the inverted interval [8,infinity].
@ -1443,7 +1443,7 @@ chebyshev_coefficients_i1e_B() {
};
template <typename T>
inline typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type
static inline typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type
chebyshev_coefficients_i1e_B() {
/* Chebyshev coefficients for exp(-x) sqrt(x) I1(x)
* in the inverted interval [8,infinity].
@ -1463,7 +1463,7 @@ chebyshev_coefficients_i1e_B() {
};
template <typename T>
inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
static inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_i0(T _x) {
T x = std::abs(_x);
@ -1481,7 +1481,7 @@ calc_i0(T _x) {
}
// Upcast bfloat16 input to float for numerical accuracy purposes
inline c10::BFloat16 calc_i0(c10::BFloat16 a) { return calc_i0(static_cast<float>(a)); }
static inline c10::BFloat16 calc_i0(c10::BFloat16 a) { return calc_i0(static_cast<float>(a)); }
/*
* This function is derived from the implementation of the i1 function in the Cephes Math Library.
@ -1493,7 +1493,7 @@ inline c10::BFloat16 calc_i0(c10::BFloat16 a) { return calc_i0(static_cast<float
* of all inputs to convert them into the domain of the approximation.
*/
template <typename T>
inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
static inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_i1(T _x) {
T x = std::abs(_x);
@ -1522,7 +1522,7 @@ calc_i1(T _x) {
* of all inputs to convert them into the domain of the approximation.
*/
template <typename T>
inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
static inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_i1e(T _x) {
T x = std::abs(_x);
@ -1549,7 +1549,7 @@ calc_i1e(T _x) {
* (integrated from minus infinity to x) is equal to y.
*/
template <typename T>
inline C10_HOST_DEVICE T calc_ndtri(T y0) {
static inline C10_HOST_DEVICE T calc_ndtri(T y0) {
/* sqrt(2pi) */
constexpr T s2pi = 2.50662827463100050242E0;
@ -1737,7 +1737,7 @@ inline C10_HOST_DEVICE T calc_ndtri(T y0) {
template <typename T>
C10_HOST_DEVICE inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
C10_HOST_DEVICE static inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
erfcx_y100(T y100)
{
switch (static_cast<int>(y100)) {
@ -2148,7 +2148,7 @@ return 0.97771701335885035464e0 + (0.22000938572830479551e-1 + (0.27951610702682
}
template <typename T>
C10_HOST_DEVICE inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
C10_HOST_DEVICE static inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_erfcx(T x)
{
if (at::_isnan(x)) {
@ -2188,7 +2188,7 @@ calc_erfcx(T x)
* See NOTICE for the licenses.
*/
template <typename T>
inline C10_HOST_DEVICE T calc_log_ndtr(T x) {
static inline C10_HOST_DEVICE T calc_log_ndtr(T x) {
T t = x * c10::frac_sqrt_2<T>;
if (x < T{-1.0}) {
return std::log(calc_erfcx(-t) / 2) - t * t;
@ -2198,7 +2198,7 @@ inline C10_HOST_DEVICE T calc_log_ndtr(T x) {
}
template<typename T>
inline C10_HOST_DEVICE T airy_ai_forward(T x) {
static inline C10_HOST_DEVICE T airy_ai_forward(T x) {
static const T AN[] = {
+3.46538101525629032477e-01,
+1.20075952739645805542e+01,
@ -2377,7 +2377,7 @@ inline C10_HOST_DEVICE T airy_ai_forward(T x) {
} // T airy_ai(T x)
template<typename T>
inline C10_HOST_DEVICE T bessel_j0_forward(T x) {
static inline C10_HOST_DEVICE T bessel_j0_forward(T x) {
static const T PP[] = {
+7.96936729297347051624e-04,
+8.28352392107440799803e-02,
@ -2489,7 +2489,7 @@ inline C10_HOST_DEVICE T bessel_j0_forward(T x) {
} // bessel_j0_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T bessel_j1_forward(T x) {
static inline C10_HOST_DEVICE T bessel_j1_forward(T x) {
static const T PP[] = {
+7.62125616208173112003e-04,
+7.31397056940917570436e-02,
@ -2597,7 +2597,7 @@ inline C10_HOST_DEVICE T bessel_j1_forward(T x) {
} // bessel_j1_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T bessel_y0_forward(T x) {
static inline C10_HOST_DEVICE T bessel_y0_forward(T x) {
static const T PP[] = {
+7.96936729297347051624e-04,
+8.28352392107440799803e-02,
@ -2712,7 +2712,7 @@ inline C10_HOST_DEVICE T bessel_y0_forward(T x) {
} // bessel_y0_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T bessel_y1_forward(T x) {
static inline C10_HOST_DEVICE T bessel_y1_forward(T x) {
static const T PP[] = {
+7.62125616208173112003e-04,
+7.31397056940917570436e-02,
@ -2826,7 +2826,7 @@ inline C10_HOST_DEVICE T bessel_y1_forward(T x) {
} // bessel_y1_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T chebyshev_polynomial_t_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T chebyshev_polynomial_t_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -2865,12 +2865,12 @@ inline C10_HOST_DEVICE T chebyshev_polynomial_t_forward(T x, int64_t n) {
} // chebyshev_polynomial_t_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T chebyshev_polynomial_t_forward(T x, T n) {
static inline C10_HOST_DEVICE T chebyshev_polynomial_t_forward(T x, T n) {
return chebyshev_polynomial_t_forward(x, static_cast<int64_t>(n));
} // chebyshev_polynomial_t_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T chebyshev_polynomial_u_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T chebyshev_polynomial_u_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -2913,12 +2913,12 @@ inline C10_HOST_DEVICE T chebyshev_polynomial_u_forward(T x, int64_t n) {
} // chebyshev_polynomial_u_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T chebyshev_polynomial_u_forward(T x, T n) {
static inline C10_HOST_DEVICE T chebyshev_polynomial_u_forward(T x, T n) {
return chebyshev_polynomial_u_forward(x, static_cast<int64_t>(n));
} // chebyshev_polynomial_u_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T chebyshev_polynomial_v_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T chebyshev_polynomial_v_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -2969,12 +2969,12 @@ inline C10_HOST_DEVICE T chebyshev_polynomial_v_forward(T x, int64_t n) {
} // chebyshev_polynomial_v_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T chebyshev_polynomial_v_forward(T x, T n) {
static inline C10_HOST_DEVICE T chebyshev_polynomial_v_forward(T x, T n) {
return chebyshev_polynomial_v_forward(x, static_cast<int64_t>(n));
} // chebyshev_polynomial_v_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T chebyshev_polynomial_w_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T chebyshev_polynomial_w_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3029,12 +3029,12 @@ inline C10_HOST_DEVICE T chebyshev_polynomial_w_forward(T x, int64_t n) {
} // chebyshev_polynomial_w_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T chebyshev_polynomial_w_forward(T x, T n) {
static inline C10_HOST_DEVICE T chebyshev_polynomial_w_forward(T x, T n) {
return chebyshev_polynomial_w_forward(x, static_cast<int64_t>(n));
} // chebyshev_polynomial_w_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3061,17 +3061,17 @@ inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, int64_t n) {
} // hermite_polynomial_h_forward(T x, int64_t n)
template<typename T, bool is_cuda=false, std::enable_if_t<!std::is_floating_point<T>::value, int> = 0>
inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
static inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
return hermite_polynomial_h_forward(x, static_cast<int64_t>(n));
} // hermite_polynomial_h_forward(T x, T n)
template<typename T, bool is_cuda=false, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
static inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
return hermite_polynomial_h_forward(x, ((!std::isinf(n)) && (!std::isnan(n))) ? static_cast<int64_t>(n) : static_cast<int64_t>(-1));
} // hermite_polynomial_h_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T hermite_polynomial_he_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T hermite_polynomial_he_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3098,12 +3098,12 @@ inline C10_HOST_DEVICE T hermite_polynomial_he_forward(T x, int64_t n) {
} // hermite_polynomial_he_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T hermite_polynomial_he_forward(T x, T n) {
static inline C10_HOST_DEVICE T hermite_polynomial_he_forward(T x, T n) {
return hermite_polynomial_he_forward(x, static_cast<int64_t>(n));
} // hermite_polynomial_he_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T laguerre_polynomial_l_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T laguerre_polynomial_l_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3134,12 +3134,12 @@ inline C10_HOST_DEVICE T laguerre_polynomial_l_forward(T x, int64_t n) {
} // laguerre_polynomial_l_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T laguerre_polynomial_l_forward(T x, T n) {
static inline C10_HOST_DEVICE T laguerre_polynomial_l_forward(T x, T n) {
return laguerre_polynomial_l_forward(x, static_cast<int64_t>(n));
} // laguerre_polynomial_l_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T legendre_polynomial_p_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T legendre_polynomial_p_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3174,12 +3174,12 @@ inline C10_HOST_DEVICE T legendre_polynomial_p_forward(T x, int64_t n) {
} // legendre_polynomial_p_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T legendre_polynomial_p_forward(T x, T n) {
static inline C10_HOST_DEVICE T legendre_polynomial_p_forward(T x, T n) {
return legendre_polynomial_p_forward(x, static_cast<int64_t>(n));
} // legendre_polynomial_p_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T modified_bessel_i0_forward(T x) {
static inline C10_HOST_DEVICE T modified_bessel_i0_forward(T x) {
static const T A[] = {
-4.41534164647933937950e-18,
+3.33079451882223809783e-17,
@ -3268,7 +3268,7 @@ inline C10_HOST_DEVICE T modified_bessel_i0_forward(T x) {
} // modified_bessel_i0_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T modified_bessel_i1_forward(T x) {
static inline C10_HOST_DEVICE T modified_bessel_i1_forward(T x) {
static const T A[] = {
+2.77791411276104639959e-18,
-2.11142121435816608115e-17,
@ -3364,7 +3364,7 @@ inline C10_HOST_DEVICE T modified_bessel_i1_forward(T x) {
} // modified_bessel_i1_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T modified_bessel_k0_forward(T x) {
static inline C10_HOST_DEVICE T modified_bessel_k0_forward(T x) {
static const T A[] = {
+1.37446543561352307156e-16,
+4.25981614279661018399e-14,
@ -3441,7 +3441,7 @@ inline C10_HOST_DEVICE T modified_bessel_k0_forward(T x) {
} // modified_bessel_k0_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T modified_bessel_k1_forward(T x) {
static inline C10_HOST_DEVICE T modified_bessel_k1_forward(T x) {
static const T A[] = {
-7.02386347938628759343e-18,
-2.42744985051936593393e-15,
@ -3519,7 +3519,7 @@ inline C10_HOST_DEVICE T modified_bessel_k1_forward(T x) {
} // modified_bessel_k1_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T scaled_modified_bessel_k0_forward(T x) {
static inline C10_HOST_DEVICE T scaled_modified_bessel_k0_forward(T x) {
static const T A[] = {
+1.37446543561352307156e-16,
+4.25981614279661018399e-14,
@ -3596,7 +3596,7 @@ inline C10_HOST_DEVICE T scaled_modified_bessel_k0_forward(T x) {
} // T scaled_modified_bessel_k0_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T scaled_modified_bessel_k1_forward(T x) {
static inline C10_HOST_DEVICE T scaled_modified_bessel_k1_forward(T x) {
static const T A[] = {
-7.02386347938628759343e-18,
-2.42744985051936593393e-15,
@ -3674,7 +3674,7 @@ inline C10_HOST_DEVICE T scaled_modified_bessel_k1_forward(T x) {
} // T scaled_modified_bessel_k1_forward(T x)
template<typename T>
inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_t_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_t_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3717,12 +3717,12 @@ inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_t_forward(T x, int64_t n)
} // shifted_chebyshev_polynomial_t_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_t_forward(T x, T n) {
static inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_t_forward(T x, T n) {
return shifted_chebyshev_polynomial_t_forward(x, static_cast<int64_t>(n));
} // shifted_chebyshev_polynomial_t_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_u_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_u_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3769,12 +3769,12 @@ inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_u_forward(T x, int64_t n)
} // shifted_chebyshev_polynomial_u_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_u_forward(T x, T n) {
static inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_u_forward(T x, T n) {
return shifted_chebyshev_polynomial_u_forward(x, static_cast<int64_t>(n));
} // shifted_chebyshev_polynomial_u_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_v_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_v_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3825,12 +3825,12 @@ inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_v_forward(T x, int64_t n)
} // shifted_chebyshev_polynomial_v_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_v_forward(T x, T n) {
static inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_v_forward(T x, T n) {
return shifted_chebyshev_polynomial_v_forward(x, static_cast<int64_t>(n));
} // shifted_chebyshev_polynomial_v_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_w_forward(T x, int64_t n) {
static inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_w_forward(T x, int64_t n) {
if (n < 0) {
return T(0.0);
}
@ -3881,12 +3881,12 @@ inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_w_forward(T x, int64_t n)
} // shifted_chebyshev_polynomial_w_forward(T x, int64_t n)
template<typename T, bool is_cuda=false>
inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_w_forward(T x, T n) {
static inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_w_forward(T x, T n) {
return shifted_chebyshev_polynomial_w_forward(x, static_cast<int64_t>(n));
} // shifted_chebyshev_polynomial_w_forward(T x, T n)
template<typename T>
inline C10_HOST_DEVICE T spherical_bessel_j0_forward(T x) {
static inline C10_HOST_DEVICE T spherical_bessel_j0_forward(T x) {
if (std::isinf(x)) {
return T(0.0);
}

View File

@ -28,6 +28,18 @@ Tensor empty_meta_symint(
size, dtype_opt, layout_opt, device_opt, pin_memory_opt, memory_format_opt);
}
// Kept only for BC with XLA
static Tensor empty_strided_meta(
IntArrayRef size,
IntArrayRef stride,
std::optional<ScalarType> dtype_opt,
std::optional<Layout> layout_opt,
std::optional<Device> device_opt,
std::optional<bool> pin_memory_opt
) {
return empty_strided_meta_symint(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), dtype_opt, layout_opt, device_opt, pin_memory_opt);
}
Tensor empty_strided_meta_symint(
SymIntArrayRef size,
SymIntArrayRef stride,

View File

@ -802,6 +802,55 @@ TORCH_IMPL_FUNC(slow_conv_transpose2d_structured_cpu)
dilation);
}
static std::tuple<Tensor&, Tensor&, Tensor&> slow_conv_transpose2d_backward_out_cpu(const Tensor& grad_output,
const Tensor& input,
const Tensor& weight,
IntArrayRef kernel_size,
IntArrayRef stride,
IntArrayRef padding,
IntArrayRef output_padding,
IntArrayRef dilation,
Tensor& grad_input,
Tensor& grad_weight,
Tensor& grad_bias) {
if (grad_input.defined()) {
slow_conv_transpose2d_backward_out_cpu_template(
input,
grad_output,
grad_input,
weight,
kernel_size,
stride,
padding,
output_padding,
dilation);
}
if (grad_bias.defined()) {
at::sum_out(grad_bias, grad_output, IntArrayRef{0, 2, 3});
}
if (grad_weight.defined()) {
grad_weight.resize_(weight.sizes(), weight.suggest_memory_format());
grad_weight.zero_();
slow_conv_transpose2d_acc_grad_parameters_cpu(
input,
weight,
grad_output,
grad_weight,
grad_bias,
kernel_size,
stride,
padding,
output_padding,
dilation,
1);
}
return std::tuple<Tensor&, Tensor&, Tensor&>(
grad_input, grad_weight, grad_bias);
}
static std::tuple<Tensor, Tensor, Tensor> slow_conv_transpose2d_backward_cpu(
const Tensor& grad_output,
const Tensor& input,

View File

@ -871,6 +871,58 @@ Tensor slow_conv_transpose3d_cpu(
return output;
}
static std::tuple<Tensor&, Tensor&, Tensor&> slow_conv_transpose3d_backward_out_cpu(const Tensor& grad_output,
const Tensor& input,
const Tensor& weight,
IntArrayRef kernel_size,
IntArrayRef stride,
IntArrayRef padding,
IntArrayRef output_padding,
IntArrayRef dilation,
Tensor& grad_input,
Tensor& grad_weight,
Tensor& grad_bias) {
if (grad_input.defined()) {
slow_conv_transpose3d_backward_out_cpu_template(
input,
grad_output,
grad_input,
weight,
kernel_size,
stride,
padding,
output_padding,
dilation);
}
if (grad_weight.defined()) {
grad_weight.resize_(weight.sizes());
grad_weight.zero_();
}
if (grad_bias.defined()) {
grad_bias.resize_({weight.size(1)});
grad_bias.zero_();
}
if (grad_weight.defined() || grad_bias.defined()) {
slow_conv_transpose3d_acc_grad_parameters_cpu(
input,
grad_output,
grad_weight,
grad_bias,
kernel_size,
stride,
padding,
output_padding,
dilation,
1);
}
return std::tuple<Tensor&, Tensor&, Tensor&>(
grad_input, grad_weight, grad_bias);
}
static std::tuple<Tensor, Tensor, Tensor> slow_conv_transpose3d_backward_cpu(
const Tensor& grad_output,
const Tensor& input,

View File

@ -339,6 +339,12 @@ Tensor& gather_out(const Tensor& self, Dimname dim, const Tensor& index, bool sp
Tensor index_add(const Tensor& self, Dimname dim, const Tensor& index, const Tensor& source, const Scalar &alpha) {
reportNYIDimnameOverload("index_add");
}
static Tensor& index_add_(Tensor& self, Dimname dim, const Tensor& index, const Tensor& source, const Scalar &alpha) {
reportNYIDimnameOverload("index_add");
}
static Tensor& index_add_out(const Tensor& self, Dimname dim, const Tensor& index, const Tensor& source, const Scalar& alpha, Tensor& result) {
reportNYIDimnameOverload("index_add");
}
Tensor index_fill(const Tensor& self, Dimname dim, const Tensor& index, const Scalar& source) {
return at::index_fill(self, dimname_to_position(self, dim), index, source);
}
@ -366,12 +372,21 @@ Tensor index_select(const Tensor& self, Dimname dim, const Tensor& index) {
Tensor scatter(const Tensor& self, Dimname dim, const Tensor& index, const Tensor& source) {
reportNYIDimnameOverload("scatter");
}
static Tensor& scatter_(Tensor& self, Dimname dim, const Tensor& index, const Tensor& source) {
reportNYIDimnameOverload("scatter");
}
Tensor scatter(const Tensor& self, Dimname dim, const Tensor& index, const Scalar& source) {
reportNYIDimnameOverload("scatter");
}
static Tensor& scatter_(Tensor& self, Dimname dim, const Tensor& index, const Scalar& source) {
reportNYIDimnameOverload("scatter");
}
Tensor scatter_add(const Tensor& self, Dimname dim, const Tensor& index, const Tensor& source) {
reportNYIDimnameOverload("scatter_add");
}
static Tensor& scatter_add_(Tensor& self, Dimname dim, const Tensor& index, const Tensor& source) {
reportNYIDimnameOverload("scatter_add");
}
std::tuple<Tensor&, Tensor&> sort_out(const Tensor& self, std::optional<bool> stable, Dimname dim, bool keepdim, Tensor& values, Tensor& indices) {
reportNYIDimnameOverload("sort");
}

View File

@ -26,7 +26,7 @@ DECLARE_DISPATCH(padding_fn, replication_pad3d_backward_kernel);
namespace padding {
template <int dim>
inline void check_valid_input(const Tensor& input, IntArrayRef padding) {
static inline void check_valid_input(const Tensor& input, IntArrayRef padding) {
TORCH_CHECK(padding.size() == 2 * dim,
"padding size is expected to be ", 2 * dim,

View File

@ -48,7 +48,7 @@ DECLARE_DISPATCH(max_pool3d_backward_fn, max_pool3d_backward_kernel);
namespace {
template <typename dest_t, typename src_t>
inline dest_t
static inline dest_t
safe_downcast(src_t v)
{
TORCH_CHECK(std::numeric_limits<dest_t>::min() <= v && v <= std::numeric_limits<dest_t>::max(),
@ -58,7 +58,7 @@ safe_downcast(src_t v)
}
template<typename T>
inline T pooling_output_shape_pad_lr(
static inline T pooling_output_shape_pad_lr(
T inputSize, T kernelSize, T pad_l, T pad_r, T stride, T dilation,
bool ceil_mode) {
T outputSize = div_rtn<T>(
@ -75,7 +75,7 @@ inline T pooling_output_shape_pad_lr(
}
template<typename T>
inline T pooling_output_shape(
static inline T pooling_output_shape(
T inputSize, T kernelSize, T pad, T stride, T dilation, bool ceil_mode) {
TORCH_CHECK(stride != 0, "stride should not be zero");
TORCH_CHECK(pad >= 0,
@ -117,7 +117,7 @@ inline std::pair<c10::SymInt, c10::SymInt> pooling_same_mode_padding_lr(
}
// AveragePool2d/DilatedMaxPool2d (forward)
inline void
static inline void
pool2d_shape_check(
const Tensor& input,
int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW,
@ -164,7 +164,7 @@ pool2d_shape_check(
}
// DilatedMaxPool2d (backward)
inline void
static inline void
max_pool2d_backward_shape_check(
const Tensor& input,
const Tensor& gradOutput,
@ -192,7 +192,7 @@ max_pool2d_backward_shape_check(
}
// AveragePool2d (backward)
inline void
static inline void
avg_pool2d_backward_shape_check(
const Tensor& input,
const Tensor& gradOutput,
@ -218,7 +218,7 @@ avg_pool2d_backward_shape_check(
}
// AveragePool3d/DilatedMaxPool3d (forward)
inline void
static inline void
pool3d_shape_check(
const Tensor& input,
int64_t nslices,
@ -280,7 +280,7 @@ pool3d_shape_check(
"Output size is too small");
}
inline void
static inline void
max_pool3d_backward_shape_check(
const Tensor& input,
const Tensor& gradOutput,
@ -317,7 +317,7 @@ max_pool3d_backward_shape_check(
check_dim_size(indices, ndim, ndim-1, owidth);
}
inline void
static inline void
avg_pool3d_backward_shape_check(
const Tensor& input,
const Tensor& gradOutput,

View File

@ -24,7 +24,7 @@ namespace native {
// only non-zero result.
template <class T,
typename std::enable_if<std::is_integral<T>::value, T>::type* = nullptr>
inline HOST_DEVICE __ubsan_ignore_signed_int_overflow__ T powi_impl(T a, T b) {
static inline HOST_DEVICE __ubsan_ignore_signed_int_overflow__ T powi_impl(T a, T b) {
T result = 1;
while (b) {
if (b & 1) {
@ -38,13 +38,13 @@ inline HOST_DEVICE __ubsan_ignore_signed_int_overflow__ T powi_impl(T a, T b) {
template <class T,
typename std::enable_if<std::is_integral<T>::value && !std::is_signed<T>::value, T>::type* = nullptr>
inline HOST_DEVICE T powi(T a, T b) {
static inline HOST_DEVICE T powi(T a, T b) {
return powi_impl(a, b);
}
template <class T,
typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, T>::type* = nullptr>
inline HOST_DEVICE T powi(T a, T b) {
static inline HOST_DEVICE T powi(T a, T b) {
if ( b < 0 ) {
if ( a == 1 ) {
return 1;

View File

@ -2276,6 +2276,11 @@ bool cpu_equal(const Tensor& self, const Tensor& other) {
return result.load();
}
static Tensor value_selecting_reduction_backward(const Tensor& grad, int64_t dim, const Tensor& indices, at::IntArrayRef sizes, bool keepdim) {
return at::native::value_selecting_reduction_backward_symint(grad, dim, indices, c10::fromIntArrayRefSlow(sizes), keepdim);
}
// max(dim), min(dim), topk(dim), mode(dim), are examples of reduction
// functions that select values. value_selecting_reduction_backward is the
// backward function for those operators; it propagates the grad to the

View File

@ -31,7 +31,7 @@ constexpr scalar_t lower_bound() {
return lim::has_infinity ? -lim::infinity() : lim::lowest();
}
inline Tensor restride_dim(
static inline Tensor restride_dim(
const Tensor& src, int64_t dim,
IntArrayRef replacement_shape
) {
@ -96,13 +96,13 @@ inline std::optional<Tensor> _allreduce_return_trivial(
" but found ", out.option())\
}
inline void check_scalar_type_device_layout_equal(const Tensor& out, const Tensor& self) {
static inline void check_scalar_type_device_layout_equal(const Tensor& out, const Tensor& self) {
OPTION_TYPE_EQUALITY_CHECK(scalar_type, out, self);
OPTION_TYPE_EQUALITY_CHECK(device, out.options(), self.options());
OPTION_TYPE_EQUALITY_CHECK(layout, out.options(), self.options());
}
inline Tensor integer_upcast(const Tensor& self, std::optional<ScalarType> dtype) {
static inline Tensor integer_upcast(const Tensor& self, std::optional<ScalarType> dtype) {
ScalarType scalarType = self.scalar_type();
TORCH_CHECK(!isBarebonesUnsignedType(scalarType), "integer upcasting for uint16, uint32 and uint64 is not currently implemented");
ScalarType upcast_scalarType = dtype.value_or(at::isIntegralType(scalarType, /*includeBool=*/true) ? ScalarType::Long : scalarType);
@ -111,7 +111,7 @@ inline Tensor integer_upcast(const Tensor& self, std::optional<ScalarType> dtype
using DimMask = TensorIterator::DimMask;
inline DimVector make_dim_vector(OptionalIntArrayRef opt_dims, int64_t ndim) {
static DimVector make_dim_vector(OptionalIntArrayRef opt_dims, int64_t ndim) {
if (opt_dims.has_value()) {
return DimVector(opt_dims.value());
} else {
@ -121,7 +121,7 @@ inline DimVector make_dim_vector(OptionalIntArrayRef opt_dims, int64_t ndim) {
}
}
inline DimMask make_dim_mask(OptionalIntArrayRef opt_dims, int64_t ndim, bool allow_empty_dims=false) {
static DimMask make_dim_mask(OptionalIntArrayRef opt_dims, int64_t ndim, bool allow_empty_dims=false) {
DimMask mask;
if (opt_dims.has_value()) {
auto dims = opt_dims.value();
@ -150,7 +150,7 @@ inline DimVector shape_from_dim_mask(const Tensor& self, DimMask mask, bool keep
return shape;
}
inline void resize_reduction_result(
static void resize_reduction_result(
Tensor& result, const Tensor& self, DimMask mask, bool keepdim,
ScalarType /*dtype*/)
{
@ -167,7 +167,7 @@ inline Tensor create_reduction_result(
return at::empty(shape, self.options().dtype(dtype));
}
inline Tensor review_reduce_result(const Tensor& result, int ndim, DimMask mask, bool keepdim) {
static Tensor review_reduce_result(const Tensor& result, int ndim, DimMask mask, bool keepdim) {
if (keepdim) {
return result;
}
@ -182,7 +182,7 @@ inline Tensor review_reduce_result(const Tensor& result, int ndim, DimMask mask,
return result.as_strided(shape, stride);
}
inline TensorIterator make_reduction(
static TensorIterator make_reduction(
const char* name, Tensor& result, const Tensor& self,
at::OptionalIntArrayRef dim_opt,
bool keepdim, ScalarType in_dtype, ScalarType out_dtype) {
@ -207,7 +207,7 @@ inline TensorIterator make_reduction(
return TensorIterator::reduce_op(viewed_result, self.to(in_dtype));
}
inline C10_UNUSED TensorIterator make_reduction(
static C10_UNUSED TensorIterator make_reduction(
const char* name, Tensor& result, const Tensor& self,
at::OptionalIntArrayRef dim, bool keepdim, ScalarType out_dtype) {
// special case for type promotion in mixed precision, improves computational
@ -222,7 +222,7 @@ inline C10_UNUSED TensorIterator make_reduction(
return make_reduction(name, result, self, dim, keepdim, in_dtype, out_dtype);
}
inline TensorIterator make_reduction(
static TensorIterator make_reduction(
const char* name, Tensor& result1, Tensor& result2, const Tensor& self,
at::OptionalIntArrayRef dim_opt, bool keepdim, ScalarType dtype1,
ScalarType dtype2) {
@ -259,13 +259,13 @@ inline TensorIterator make_reduction(
return TensorIterator::reduce_op(viewed_result1, viewed_result2, self.to(dtype1));
}
inline C10_UNUSED TensorIterator make_reduction(
static C10_UNUSED TensorIterator make_reduction(
const char* name, Tensor& result1, Tensor& result2, const Tensor& self,
at::OptionalIntArrayRef dim, bool keepdim, ScalarType dtype) {
return make_reduction(name, result1, result2, self, dim, keepdim, dtype, dtype);
}
inline void zero_numel_check_dims(const Tensor& self, const int64_t dim, const char *fn_name) {
static void zero_numel_check_dims(const Tensor& self, const int64_t dim, const char *fn_name) {
if (self.ndimension() == 0) {
TORCH_CHECK_INDEX(dim == 0 || dim == -1, fn_name,
": Expected reduction dim -1 or 0 for scalar but got ", dim);
@ -276,7 +276,7 @@ inline void zero_numel_check_dims(const Tensor& self, const int64_t dim, const c
}
}
inline void zero_numel_check_dims(const Tensor& self, const IntArrayRef dim, const char *fn_name) {
static void zero_numel_check_dims(const Tensor& self, const IntArrayRef dim, const char *fn_name) {
TORCH_CHECK(
!dim.empty(),
fn_name, ": Expected reduction dim to be specified for input.numel() == 0. ",
@ -286,7 +286,7 @@ inline void zero_numel_check_dims(const Tensor& self, const IntArrayRef dim, con
}
}
inline std::vector<int64_t> get_zero_numel_tensor_size(
static std::vector<int64_t> get_zero_numel_tensor_size(
const Tensor& self,
const int64_t dim,
const bool keepdim,
@ -313,7 +313,7 @@ inline std::vector<int64_t> get_zero_numel_tensor_size(
// This function should be called when you are reducing a zero-numel tensor and want to
// resize the output and return it. This function exists for resizing zero-numel
// tensors when the size of the reduction dimension is non-zero.
inline C10_UNUSED void zero_numel_tensor_resize(Tensor& result, Tensor& result_indices,
static C10_UNUSED void zero_numel_tensor_resize(Tensor& result, Tensor& result_indices,
const Tensor& self, const int64_t dim,
const bool keepdim, const char *fn_name) {
auto sizes = get_zero_numel_tensor_size(self, dim, keepdim, fn_name);
@ -349,7 +349,7 @@ inline ScalarType get_dtype_from_result(Tensor& result, std::optional<ScalarType
namespace at::meta {
inline C10_UNUSED DimVector get_reduction_shape(
static C10_UNUSED DimVector get_reduction_shape(
const Tensor& self,
IntArrayRef dims,
bool keepdim,
@ -358,7 +358,7 @@ inline C10_UNUSED DimVector get_reduction_shape(
return native::shape_from_dim_mask(self, mask, keepdim);
}
inline void resize_reduction(
static void resize_reduction(
impl::MetaBase& meta,
const Tensor& self,
OptionalIntArrayRef opt_dims,
@ -379,7 +379,7 @@ inline void resize_reduction(
meta.maybe_get_output(), self, dims_, keepdim);
}
inline void resize_reduction_with_indices(
static void resize_reduction_with_indices(
impl::MetaBase& meta,
const Tensor& self,
IntArrayRef dims,
@ -396,7 +396,7 @@ inline void resize_reduction_with_indices(
meta.maybe_get_output(1), self, dims_, keepdim);
}
inline TensorIterator make_reduction(
static TensorIterator make_reduction(
const Tensor& self,
const Tensor& result,
OptionalIntArrayRef opt_dims,
@ -412,7 +412,7 @@ inline TensorIterator make_reduction(
return TensorIterator::reduce_op(viewed_result, self.to(in_dtype));
}
inline TensorIterator make_reduction(
static TensorIterator make_reduction(
const Tensor& self,
const Tensor& result1,
const Tensor& result2,
@ -434,7 +434,7 @@ inline TensorIterator make_reduction(
return TensorIterator::reduce_op(viewed_result1, viewed_result2, self.to(dtype1));
}
inline C10_UNUSED TensorIterator make_reduction_from_out_ty(
static C10_UNUSED TensorIterator make_reduction_from_out_ty(
const Tensor& self,
const Tensor& result,
OptionalIntArrayRef opt_dims,

View File

@ -6,7 +6,7 @@ namespace at::native {
enum class ReductionType {MAX, MEAN, MIN, SUM, PROD};
inline ReductionType get_reduction_enum(const c10::string_view& reduce) {
static inline ReductionType get_reduction_enum(const c10::string_view& reduce) {
if (reduce == "max" || reduce == "amax") {
return ReductionType::MAX;
} else if (reduce == "mean") {
@ -23,7 +23,7 @@ inline ReductionType get_reduction_enum(const c10::string_view& reduce) {
}
// used for `scatter_reduce`, old options for BC.
inline ReductionType get_operator_enum(const c10::string_view reduce, bool use_new_options) {
static inline ReductionType get_operator_enum(const c10::string_view reduce, bool use_new_options) {
if (use_new_options) {
return get_reduction_enum(reduce);
} else {

View File

@ -301,6 +301,14 @@ void reflection_pad2d_backward_out_template(
} // namespace
// TODO: I tihnk this function should be removed since we implement it with
// TORCH_IMPL_FUNC below
static Tensor& reflection_pad1d_out_cpu(const Tensor& input, IntArrayRef padding,
Tensor& output) {
reflection_pad1d_kernel(kCPU, output, input, padding);
return output;
}
Tensor& reflection_pad1d_out_quantized_cpu(const Tensor& input, IntArrayRef padding,
Tensor& output) {
TORCH_CHECK(input.qscheme() == kPerTensorAffine, "Only per tensor quantization is supported");

View File

@ -231,6 +231,14 @@ TensorImpl* resize_impl_cpu_(
return _resize_impl_(self, size, stride, resize_storage);
}
static TensorImpl* resize_impl_meta_(
TensorImpl* self,
c10::SymIntArrayRef size,
at::OptionalSymIntArrayRef stride,
bool resize_storage = true) {
return _resize_impl_(self, size, stride, resize_storage);
}
template <typename T>
const Tensor& _resize_(
const Tensor& self,

View File

@ -40,7 +40,7 @@ TORCH_API void resize_bytes_cpu(StorageImpl* storage, size_t size_bytes);
TORCH_API void resize_bytes_meta(StorageImpl* storage, c10::SymInt size_bytes);
TORCH_API void resize_bytes_nocuda(const Storage& storage, c10::SymInt size_bytes);
inline void maybe_resize_storage_cpu(TensorImpl* self, size_t new_size_bytes) {
static inline void maybe_resize_storage_cpu(TensorImpl* self, size_t new_size_bytes) {
// It does not make sense to try to resize a storage
// to hold 0 elements, and this can break
// if storage_offset is positive but
@ -79,7 +79,7 @@ template <>
inline int64_t maybe_convert_symint(c10::SymInt x) { return x.guard_int(__FILE__, __LINE__); }
template <typename T>
inline void checkInBoundsForStorage(
static inline void checkInBoundsForStorage(
ArrayRef<T> size,
ArrayRef<T> stride,
T storage_offset,
@ -111,7 +111,7 @@ inline void checkInBoundsForStorage(
}
template <typename T>
inline void checkSetStorage(Tensor& result, Storage storage, T storage_offset,
static inline void checkSetStorage(Tensor& result, Storage storage, T storage_offset,
ArrayRef<T> size, ArrayRef<T> stride) {
// FIXME: stride should be optional
if (stride.data()) {

View File

@ -440,6 +440,15 @@ TORCH_IMPL_FUNC(log_softmax_backward_cpu_out) (
}
}
static Tensor softmax(const Tensor& input_, const int64_t dim_) {
auto result = [&]() {
NoNamesGuard guard;
return at::_softmax(input_, dim_, false);
}();
namedinference::propagate_names(result, input_);
return result;
}
Tensor softmax(const Tensor& input_, const int64_t dim_, std::optional<ScalarType> dtype) {
auto result = [&]() {
NoNamesGuard guard;
@ -496,6 +505,15 @@ Tensor special_softmax(const Tensor& input_, const int64_t dim_, std::optional<S
return at::softmax(input_, dim_, dtype);
}
static Tensor log_softmax(const Tensor& input_, const int64_t dim_) {
auto result = [&]() {
NoNamesGuard guard;
return at::_log_softmax(input_, dim_, false);
}();
namedinference::propagate_names(result, input_);
return result;
}
Tensor log_softmax(const Tensor& input_, const int64_t dim_, std::optional<ScalarType> dtype) {
auto result = [&]() {
NoNamesGuard guard;

View File

@ -792,6 +792,12 @@ std::tuple<Tensor, Tensor> max(const Tensor& self, Dimname dim, bool keepdim) {
std::tuple<Tensor&, Tensor&> max_out(const Tensor& self, Dimname dim, bool keepdim, Tensor& max, Tensor& max_indices) {
return at::max_out(max, max_indices, self, dimname_to_position(self, dim), keepdim);
}
static Tensor argmax(const Tensor& /*self*/, Dimname /*dim*/, bool /*keepdim*/) {
reportNYIDimnameOverload("argmax");
}
static Tensor argmin(const Tensor& /*self*/, Dimname /*dim*/, bool /*keepdim*/) {
reportNYIDimnameOverload("argmin");
}
Tensor argsort(const Tensor& /*self*/, Dimname /*dim*/, bool /*keepdim*/) {
reportNYIDimnameOverload("argsort");
}

View File

@ -172,10 +172,18 @@ Tensor arange(
return at::arange_out(result, start, end, step);
}
static Tensor& arange_start_out(const Scalar& start, const Scalar& end, Tensor& result) {
return at::arange_out(result, start, end, /*step=*/1);
}
Tensor& arange_out(const Scalar& end, Tensor& result) {
return at::arange_out(result, /*start=*/0, end, /*step=*/1);
}
static Tensor& arange_out(Tensor& result, const Scalar& start, const Scalar& end) {
return at::arange_out(result, start, end, /*step=*/1);
}
Tensor _dim_arange(const Tensor& like, int64_t dim) {
return at::arange(like.size(dim), like.options().dtype(at::kLong));
}

View File

@ -105,6 +105,10 @@ Tensor & detach_(Tensor & self) {
return self;
}
static Tensor contiguous(const Tensor & self) {
return contiguous(self, MemoryFormat::Contiguous);
}
Tensor contiguous(const Tensor& self, MemoryFormat memory_format) {
if (self.is_contiguous(memory_format)) {
return self;

View File

@ -1181,6 +1181,14 @@ Tensor as_strided_tensorimpl(const Tensor& self, IntArrayRef size, IntArrayRef s
return result;
}
static Tensor as_strided_tensorimpl_meta(const Tensor& self, IntArrayRef size, IntArrayRef stride, optional<int64_t> storage_offset_) {
auto storage_offset = storage_offset_.value_or(self.storage_offset());
auto result = at::detail::make_tensor<TensorImpl>(
c10::TensorImpl::VIEW, Storage(self.storage()), self.key_set(), self.dtype());
setStrided(result, size, stride, storage_offset);
return result;
}
template <typename T>
inline void setStridedUnchecked(
const Tensor& self,
@ -1241,6 +1249,10 @@ const Tensor &as_strided__symint(const Tensor& self, SymIntArrayRef size, SymInt
return self;
}
static Tensor narrow_copy_dense(const Tensor& self, int64_t dim, int64_t start, int64_t length) {
return self.narrow(dim, start, length).clone(at::MemoryFormat::Contiguous);
}
// Should just use narrow_copy_out, but this API is used internally at Meta:
// https://github.com/pytorch/pytorch/pull/87045#issuecomment-1309353561
Tensor narrow_copy_dense_cpu(const Tensor& self, int64_t dim, int64_t start, int64_t length){
@ -3575,6 +3587,10 @@ Tensor view_as(const Tensor& self, const Tensor& other) {
return self.view_symint(other.sym_sizes());
}
static int64_t numel(const Tensor& self) {
return self.unsafeGetTensorImpl()->numel();
}
std::vector<Tensor> unbind(const Tensor &self, int64_t dim) {
dim = maybe_wrap_dim(dim, self.dim());
int64_t size = self.size(dim);

View File

@ -180,6 +180,10 @@ TORCH_IMPL_FUNC(triu_cpu)(const Tensor& self, int64_t k, const Tensor &result) {
compute_triu_tril<UpperTriangle>(self, k, result);
}
static Tensor trace_backward(const Tensor& grad, at::IntArrayRef sizes) {
return at::native::trace_backward_symint(grad, c10::fromIntArrayRefSlow(sizes));
}
Tensor trace_backward_symint(const Tensor& grad, c10::SymIntArrayRef sizes) {
if (sizes.size() != 2) {
throw std::runtime_error("expected matrix input");

View File

@ -24,6 +24,10 @@
namespace at::native {
static bool is_cuda(const Tensor& self) {
return self.is_cuda();
}
bool is_distributed(const Tensor& self) {
return false;
}
@ -56,6 +60,18 @@ bool is_neg(const Tensor& self) {
return self.is_neg();
}
static bool is_sparse(const Tensor& self) {
return self.is_sparse();
}
static bool is_sparse_csr(const Tensor& self) {
return self.is_sparse_csr();
}
static bool is_quantized(const Tensor& self) {
return self.is_quantized();
}
// True if `self` and `from` have compatible tensor type so that `from`'s
// TensorImpl can be copied to `self`.
bool _has_compatible_shallow_copy_type(const Tensor& self, const Tensor& from) {

Some files were not shown because too many files have changed in this diff Show More