Compare commits

..

38 Commits

Author SHA1 Message Date
1d983f0775 Merge branch 'findhao/operatorbench2' into findhao/operatorbench3 2024-10-01 13:28:23 -07:00
f320e4ba86 fix lint 2024-10-01 13:19:14 -07:00
73dfb2bc2d fix lint; fix affected imports 2024-10-01 13:17:37 -07:00
f2654ae713 update installation for unit test 2024-09-30 15:48:56 -07:00
ab40b51c5d Merge remote-tracking branch 'origin' into findhao/operatorbench2 2024-09-30 13:50:08 -07:00
8c1b793071 fix input generation; split native and custom 2024-09-30 11:22:20 -07:00
de205901f3 Merge branch 'findhao/operatorbench2' into findhao/operatorbench3 2024-09-27 14:07:52 -07:00
9f2936931a only enable test when triton installed 2024-09-27 09:43:25 -07:00
5fa8031ae5 fix lint 2024-09-26 17:35:17 -07:00
a245137d76 update docstring 2024-09-26 17:07:23 -07:00
2d93c5f720 add unit test and fix input issues 2024-09-26 17:00:46 -07:00
6f3b42a073 add unit test; add profile-folder; 2024-09-26 10:56:06 -07:00
db4c9a54a2 fix lint 2024-09-25 14:02:28 -04:00
f78da95bc5 remove single_run; add prepare_input_and_functions; add type annotations 2024-09-25 13:51:37 -04:00
7c2bc74a72 temporary saved 2024-09-25 12:45:20 -04:00
7b366a2b70 fix input compatibility 2024-09-24 17:38:23 -04:00
b437ffe8b0 fix input format 2024-09-24 17:23:05 -04:00
085e2f5416 add channel_last; 2024-09-24 12:37:30 -04:00
425ad9ccdb move previous operatorbench to new one 2024-09-20 16:52:35 -04:00
900671f799 collect instances rather than classes. it is better for compatibility with original operatorbench 2024-09-20 16:48:08 -04:00
1f30017712 fix lint 2024-09-19 13:19:40 -04:00
1fdf24d9a5 fix lint 2024-09-16 20:10:34 -04:00
8a4bc3cc09 update comment 2024-09-16 19:22:31 -04:00
78f5027b48 use mean of results for each input 2024-09-16 19:19:22 -04:00
6e2d4c661a fix docs and default configs; remove unused function; 2024-09-16 19:10:56 -04:00
30dd419560 fix MetricResult 2024-09-16 18:51:09 -04:00
f280038562 add profile 2024-09-16 18:12:25 -04:00
0bb482185c format output 2024-09-16 17:30:37 -04:00
18c2804981 fix lint 2024-09-16 17:18:14 -04:00
a6b6bbc293 add inductor variant 2024-09-16 17:17:32 -04:00
ebd4755b0d fix lint 2024-09-16 16:17:58 -04:00
8779577950 add requirements.txt 2024-09-16 16:02:24 -04:00
a6d9a506c3 make the resultmetrics more clear 2024-09-16 14:03:40 -04:00
a0ecd4f45d fix bug for full 2024-09-13 20:00:23 -04:00
9cdac0662b add metrics; convert some argument from string to enum etc.; 2024-09-13 17:17:38 -04:00
57be1aae4b add benchmarkconfig; fix subclass inheritance 2024-09-13 13:04:39 -04:00
22ee74895b remove multirun 2024-09-12 16:18:30 -04:00
ea97de291b init 2024-09-11 17:45:24 -04:00
923 changed files with 8692 additions and 19143 deletions

View File

@ -355,12 +355,6 @@ case "$image" in
CONDA_CMAKE=yes
VISION=yes
;;
pytorch-linux-jammy-py3-clang18-asan)
ANACONDA_PYTHON_VERSION=3.10
CLANG_VERSION=18
CONDA_CMAKE=yes
VISION=yes
;;
pytorch-linux-jammy-py3.9-gcc11)
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=11
@ -387,13 +381,6 @@ case "$image" in
HALIDE=yes
TRITON=yes
;;
pytorch-linux-jammy-py3.12-triton-cpu)
CUDA_VERSION=12.4
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
CONDA_CMAKE=yes
TRITON_CPU=yes
;;
pytorch-linux-focal-linter)
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
# We will need to update mypy version eventually, but that's for another day. The task
@ -523,7 +510,6 @@ docker build \
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
--build-arg "TRITON=${TRITON}" \
--build-arg "TRITON_CPU=${TRITON_CPU}" \
--build-arg "ONNX=${ONNX}" \
--build-arg "DOCS=${DOCS}" \
--build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \

View File

@ -1 +0,0 @@
6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f

View File

@ -13,17 +13,11 @@ if [ -n "$CLANG_VERSION" ]; then
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
# work around ubuntu apt-get conflicts
sudo apt-get -y -f install
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
if [[ $CLANG_VERSION == 18 ]]; then
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
fi
fi
sudo apt-get update
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
if [[ $CLANG_VERSION == 18 ]]; then
apt-get install -y --no-install-recommends libomp-18-dev
fi
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
# Install dev version of LLVM.
if [ -n "$LLVMDEV" ]; then

View File

@ -32,7 +32,7 @@ pip_install coloredlogs packaging
pip_install onnxruntime==1.18.1
pip_install onnx==1.16.2
pip_install onnxscript==0.1.0.dev20241008 --no-deps
pip_install onnxscript==0.1.0.dev20240831 --no-deps
# required by onnxscript
pip_install ml_dtypes

View File

@ -15,11 +15,8 @@ conda_reinstall() {
if [ -n "${XPU_VERSION}" ]; then
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
TRITON_TEXT_FILE="triton-xpu"
elif [ -n "${TRITON_CPU}" ]; then
TRITON_REPO="https://github.com/triton-lang/triton-cpu"
TRITON_TEXT_FILE="triton-cpu"
else
TRITON_REPO="https://github.com/triton-lang/triton"
TRITON_REPO="https://github.com/openai/triton"
TRITON_TEXT_FILE="triton"
fi
@ -47,10 +44,9 @@ chown -R jenkins /var/lib/jenkins/triton
chgrp -R jenkins /var/lib/jenkins/triton
pushd /var/lib/jenkins/
as_jenkins git clone --recursive ${TRITON_REPO} triton
as_jenkins git clone ${TRITON_REPO} triton
cd triton
as_jenkins git checkout ${TRITON_PINNED_COMMIT}
as_jenkins git submodule update --init --recursive
cd python
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527

View File

@ -139,9 +139,9 @@ opt-einsum==3.3
#Pinned versions: 3.3
#test that import: test_linalg.py
optree==0.13.0
optree==0.12.1
#Description: A library for tree manipulation
#Pinned versions: 0.13.0
#Pinned versions: 0.12.1
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
#test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
#common_utils.py, test_eager_transforms.py, test_python_dispatch.py,

View File

@ -147,13 +147,6 @@ COPY ci_commit_pins/triton.txt triton.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton.txt
ARG TRITON_CPU
COPY ./common/install_triton.sh install_triton.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton-cpu.txt
ARG EXECUTORCH
# Build and install executorch
COPY ./common/install_executorch.sh install_executorch.sh

View File

@ -178,7 +178,7 @@ fi
# sccache will fail for CUDA builds if all cores are used for compiling
# gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
if [ -z "$MAX_JOBS" ]; then
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; } && which sccache > /dev/null; then
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
export MAX_JOBS=$(($(nproc) - 1))
fi
fi
@ -218,6 +218,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
export USE_PRECOMPILED_HEADERS=1
fi
if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build* ]]; then
export USE_GLOO_WITH_OPENSSL=ON
fi
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
fi

View File

@ -191,22 +191,9 @@ function install_torchrec_and_fbgemm() {
pip_uninstall torchrec-nightly
pip_uninstall fbgemm-gpu-nightly
pip_install setuptools-git-versioning scikit-build pyre-extensions
# TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it
# seems to be an sccache-related issue
if [[ "$IS_A100_RUNNER" == "1" ]]; then
unset CMAKE_CUDA_COMPILER_LAUNCHER
sudo mv /opt/cache/bin /opt/cache/bin-backup
fi
# See https://github.com/pytorch/pytorch/issues/106971
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
if [[ "$IS_A100_RUNNER" == "1" ]]; then
export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
sudo mv /opt/cache/bin-backup /opt/cache/bin
fi
}
function clone_pytorch_xla() {

View File

@ -403,7 +403,7 @@ pr_time_benchmarks() {
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
echo "benchmark results on current PR: "
cat "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "$TEST_REPORTS_DIR/new_expected_results.csv"
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
}
if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then
@ -606,11 +606,6 @@ test_inductor_halide() {
assert_git_not_dirty
}
test_inductor_triton_cpu() {
python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose
assert_git_not_dirty
}
test_dynamo_benchmark() {
# Usage: test_dynamo_benchmark huggingface 0
TEST_REPORTS_DIR=$(pwd)/test/test-reports
@ -665,6 +660,15 @@ test_inductor_torchbench_smoketest_perf() {
# The threshold value needs to be actively maintained to make this check useful
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
--export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
# The threshold value needs to be actively maintained to make this check useful
# The perf number of nanogpt seems not very stable, e.g.
# https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
# and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
# we switch to use some other model.
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
# Check memory compression ratio for a few models
for test in hf_Albert timm_vision_transformer; do
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
@ -1435,8 +1439,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
test_inductor_distributed
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
test_inductor_halide
elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
test_inductor_triton_cpu
elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
test_inductor_micro_benchmark
elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
@ -1460,7 +1462,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
# https://github.com/opencv/opencv-python/issues/885
pip_install opencv-python==4.8.0.74
if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \

View File

@ -26,7 +26,7 @@ fi
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
set +ex
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=pythoncapi_compat.h --exclude=eval_frame.c torch/
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
PYLONG_API_CHECK=$?
if [[ $PYLONG_API_CHECK == 0 ]]; then
echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"

View File

@ -27,11 +27,12 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
source activate testenv >/dev/null
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
python_path="/opt/python/cp\$python_nodot-cp\${python_nodot}"
if [[ "\$python_nodot" = *t ]]; then
python_digits="\$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
python_path="/opt/python/cp\$python_digits-cp\${python_digits}t"
# Prior to Python 3.8 paths were suffixed with an 'm'
if [[ -d "\${python_path}/bin" ]]; then
export PATH="\${python_path}/bin:\$PATH"
elif [[ -d "\${python_path}m/bin" ]]; then
export PATH="\${python_path}m/bin:\$PATH"
fi
export PATH="\${python_path}/bin:\$PATH"
fi
EXTRA_CONDA_FLAGS=""

View File

@ -44,9 +44,7 @@ ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ForEachMacros:
- FOR_EACH_RANGE
- FOR_EACH
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
Priority: 1
@ -60,24 +58,6 @@ IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
Macros:
- >-
PyObject_HEAD_INIT(type)={
/* this is not exactly match with PyObject_HEAD_INIT in Python source code
* but it is enough for clang-format */
{ 0xFFFFFFFF },
(type)
},
- >-
PyVarObject_HEAD_INIT(type, size)={
{
/* manually expand PyObject_HEAD_INIT(type) above
* because clang-format do not support recursive expansion */
{ 0xFFFFFFFF },
(type)
},
(size)
},
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
PenaltyBreakBeforeFirstCallParameter: 1
@ -99,11 +79,7 @@ SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: c++17
StatementMacros:
- PyObject_HEAD
- PyObject_VAR_HEAD
- PyException_HEAD
Standard: Cpp11
TabWidth: 8
UseTab: Never
---

38
.github/ISSUE_TEMPLATE.md vendored Normal file
View File

@ -0,0 +1,38 @@
If you have a question or would like help and support, please ask at our
[forums](https://discuss.pytorch.org/).
If you are submitting a feature request, please preface the title with [feature request].
If you are submitting a bug report, please fill in the following details.
## Issue description
Provide a short description.
## Code example
Please try to provide a minimal example to repro the bug.
Error messages and stack traces are also helpful.
## System Info
Please copy and paste the output from our
[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py)
(or fill out the checklist below manually).
You can get the script and run it with:
```
wget https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py
# For security purposes, please check the contents of collect_env.py before running it.
python collect_env.py
```
- PyTorch or Caffe2:
- How you installed PyTorch (conda, pip, source):
- Build command you used (if compiling from source):
- OS:
- PyTorch version:
- Python version:
- CUDA/cuDNN version:
- GPU models and configuration:
- GCC version (if compiling from source):
- CMake version:
- Versions of any other relevant libraries:

View File

@ -18,14 +18,8 @@ inputs:
runs:
using: composite
steps:
- name: Check if in a container runner
shell: bash
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Clean workspace
shell: bash
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
env:
NO_SUDO: ${{ inputs.no-sudo }}
run: |

View File

@ -85,25 +85,15 @@ runs:
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Check if in a container runner
- name: Check if in a ARC runner
shell: bash
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
id: check_arc_runner
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
id: install-nvidia-driver
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
- name: Setup GPU_FLAG for docker run
id: setup-gpu-flag
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
id: setup-sscache-port-flag
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
- name: Lock NVIDIA A100 40GB Frequency
shell: bash
@ -111,7 +101,7 @@ runs:
sudo nvidia-smi -pm 1
sudo nvidia-smi -ac 1215,1410
nvidia-smi
if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
if: contains(matrix.runner, 'a100')
- name: Start monitoring script
id: monitor-script
@ -182,7 +172,6 @@ runs:
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
DOCKER_IMAGE: ${{ inputs.docker-image }}
@ -192,9 +181,6 @@ runs:
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
HUGGING_FACE_HUB_TOKEN: ${{ inputs.HUGGING_FACE_HUB_TOKEN }}
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
shell: bash
run: |
set -x
@ -213,7 +199,6 @@ runs:
# shellcheck disable=SC2086,SC2090
container_name=$(docker run \
${GPU_FLAG:-} \
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e GITHUB_ACTIONS \
@ -242,7 +227,6 @@ runs:
-e PR_LABELS \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SCCACHE_S3_KEY_PREFIX \
-e XLA_CUDA \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
@ -250,9 +234,7 @@ runs:
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e HUGGING_FACE_HUB_TOKEN \
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
-e DASHBOARD_TAG \
-e IS_A100_RUNNER \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
@ -323,7 +305,7 @@ runs:
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
if: always()
# NB: We are currently having an intermittent GPU-related issue on G5 runners with
# A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does

View File

@ -28,14 +28,14 @@ runs:
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: Check if in a container runner
- name: Check if in a ARC runner
shell: bash
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
id: check_arc_runner
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> $GITHUB_OUTPUT
- name: Start docker if docker deamon is not running
shell: bash
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
run: |
if systemctl is-active --quiet docker; then
echo "Docker daemon is running...";
@ -73,7 +73,7 @@ runs:
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Kill any existing containers, clean up images
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
shell: bash
run: |
# ignore expansion of "docker ps -q" since it could be empty
@ -116,7 +116,7 @@ runs:
- name: Check that the docker daemon is running
shell: bash
continue-on-error: true
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'true' }}
run: |
set +x

View File

@ -1 +1 @@
3f0569939c4369bec943fc27d1c9d8dfbc828c26
ba696ea3dfec4cbe693bf06a84c75dc196077f5b

View File

@ -16,7 +16,6 @@ ciflow_push_tags:
- ciflow/nightly
- ciflow/periodic
- ciflow/rocm
- ciflow/s390
- ciflow/slow
- ciflow/trunk
- ciflow/unstable

View File

@ -1,4 +1,4 @@
# iOS simulator requirements
coremltools==5.0b5
protobuf==3.20.2
optree==0.13.0
optree==0.12.1

View File

@ -27,7 +27,7 @@ pytest-cpp==2.3.0
rockset==1.0.3
z3-solver==4.12.2.0
tensorboard==2.13.0
optree==0.13.0
optree==0.12.1
# NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
# which the stringify metadata is wrong when escaping double quote
protobuf==3.20.2

View File

@ -333,7 +333,7 @@ def generate_wheels_matrix(
package_type = "manywheel"
if python_versions is None:
python_versions = FULL_PYTHON_VERSIONS + ["3.13", "3.13t"]
python_versions = FULL_PYTHON_VERSIONS + ["3.13"]
if arches is None:
# Define default compute archivectures
@ -369,13 +369,7 @@ def generate_wheels_matrix(
# TODO: Enable python 3.13 on rocm, aarch64, windows
if (
gpu_arch_type == "rocm" or (os != "linux" and os != "linux-s390x")
) and (python_version == "3.13" or python_version == "3.13t"):
continue
# TODO: Enable python 3.13t on xpu and cpu-s390x
if (
gpu_arch_type == "xpu" or gpu_arch_type == "cpu-s390x"
) and python_version == "3.13t":
) and python_version == "3.13":
continue
if use_split_build and (

View File

@ -1,9 +1,5 @@
# flake8: noqa: G004
# Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
# must be kept in sync. You can do it easily by running the following command:
# python .github/scripts/update_runner_determinator.py
"""
This runner determinator is used to determine which set of runners to run a
GitHub job on. It uses the first comment of a GitHub issue (by default
@ -83,9 +79,6 @@ class Experiment(NamedTuple):
rollout_perc: float = (
0 # Percentage of workflows to experiment on when user is not opted-in.
)
all_branches: bool = (
False # If True, the experiment is also enabled on the exception branches
)
# Add more fields as needed
@ -219,7 +212,7 @@ def get_potential_pr_author(
def is_exception_branch(branch: str) -> bool:
"""
Branches that get opted out of experiments by default, until they're explicitly enabled.
Branches that get opted out of all experiments and should always use Meta runners
"""
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
@ -345,10 +338,7 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -
def get_runner_prefix(
rollout_state: str,
workflow_requestors: Iterable[str],
branch: str,
is_canary: bool = False,
rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
) -> str:
settings = parse_settings(rollout_state)
user_optins = parse_users(rollout_state)
@ -358,12 +348,6 @@ def get_runner_prefix(
for experiment_name, experiment_settings in settings.experiments.items():
enabled = False
if not experiment_settings.all_branches and is_exception_branch(branch):
log.info(
f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
)
continue
# Is any workflow_requestor opted in to this experiment?
opted_in_users = [
requestor
@ -423,34 +407,35 @@ def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -
def main() -> None:
args = parse_args()
runner_label_prefix = DEFAULT_LABEL_PREFIX
try:
rollout_state = get_rollout_state_from_issue(
args.github_token, args.github_issue_repo, args.github_issue
if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
log.info(
f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
)
runner_label_prefix = DEFAULT_LABEL_PREFIX
else:
try:
rollout_state = get_rollout_state_from_issue(
args.github_token, args.github_issue_repo, args.github_issue
)
username = get_potential_pr_author(
args.github_token,
args.github_repo,
args.github_actor,
args.github_ref_type,
args.github_branch,
)
username = get_potential_pr_author(
args.github_token,
args.github_repo,
args.github_actor,
args.github_ref_type,
args.github_branch,
)
is_canary = args.github_repo == "pytorch/pytorch-canary"
is_canary = args.github_repo == "pytorch/pytorch-canary"
runner_label_prefix = get_runner_prefix(
rollout_state,
(args.github_issue_owner, username),
args.github_branch,
is_canary,
)
runner_label_prefix = get_runner_prefix(
rollout_state, (args.github_issue_owner, username), is_canary
)
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)

View File

@ -4,10 +4,6 @@ from unittest.mock import Mock, patch
import runner_determinator as rd
USER_BRANCH = "somebranch"
EXCEPTION_BRANCH = "main"
class TestRunnerDeterminatorIssueParser(TestCase):
def test_parse_settings(self) -> None:
settings_text = """
@ -70,40 +66,6 @@ class TestRunnerDeterminatorIssueParser(TestCase):
"otherExp settings not parsed correctly",
)
def test_parse_all_branches_setting(self) -> None:
settings_text = """
```
experiments:
lf:
rollout_perc: 25
all_branches: true
otherExp:
all_branches: True
rollout_perc: 0
```
---
Users:
@User1,lf
@User2,lf,otherExp
"""
settings = rd.parse_settings(settings_text)
self.assertTupleEqual(
rd.Experiment(rollout_perc=25, all_branches=True),
settings.experiments["lf"],
"lf settings not parsed correctly",
)
self.assertTrue(settings.experiments["otherExp"].all_branches)
self.assertTupleEqual(
rd.Experiment(rollout_perc=0, all_branches=True),
settings.experiments["otherExp"],
"otherExp settings not parsed correctly",
)
def test_parse_users(self) -> None:
settings_text = """
experiments:
@ -157,7 +119,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
@User2,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
prefix = rd.get_runner_prefix(settings_text, ["User1"])
self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
def test_opted_in_user_two_experiments(self) -> None:
@ -174,7 +136,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
@User2,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
prefix = rd.get_runner_prefix(settings_text, ["User2"])
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for User2")
@patch("random.uniform", return_value=50)
@ -192,7 +154,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
@User2,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
prefix = rd.get_runner_prefix(settings_text, ["User3"])
self.assertEqual("", prefix, "Runner prefix not correct for user")
@patch("random.uniform", return_value=10)
@ -212,7 +174,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
"""
# User3 is opted out, but is pulled into both experiments by the 10% rollout
prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
prefix = rd.get_runner_prefix(settings_text, ["User3"])
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
def test_lf_prefix_always_comes_first(self) -> None:
@ -230,7 +192,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
"""
prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
prefix = rd.get_runner_prefix(settings_text, ["User2"])
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
def test_ignores_commented_users(self) -> None:
@ -248,7 +210,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
prefix = rd.get_runner_prefix(settings_text, ["User1"])
self.assertEqual("", prefix, "Runner prefix not correct for user")
def test_ignores_extra_experiments(self) -> None:
@ -267,44 +229,9 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
prefix = rd.get_runner_prefix(settings_text, ["User1"])
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
def test_disables_experiment_on_exception_branches_when_not_explicitly_opted_in(
self,
) -> None:
settings_text = """
experiments:
lf:
rollout_perc: 100
---
Users:
@User,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
self.assertEqual("", prefix, "Runner prefix not correct for user")
def test_allows_experiment_on_exception_branches_when_explicitly_opted_in(
self,
) -> None:
settings_text = """
experiments:
lf:
rollout_perc: 100
all_branches: true
---
Users:
@User,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
self.assertEqual("lf.", prefix, "Runner prefix not correct for user")
if __name__ == "__main__":
main()

View File

@ -12,7 +12,7 @@ import json
import os
import warnings
from hashlib import sha256
from typing import Any, List, Optional
from typing import Any, Dict, List, Optional
from unittest import main, mock, skip, TestCase
from urllib.error import HTTPError
@ -24,6 +24,7 @@ from trymerge import (
find_matching_merge_rule,
get_classifications,
get_drci_classifications,
get_rockset_results,
gh_get_team_members,
GitHubPR,
JobCheckState,
@ -41,6 +42,7 @@ if "GIT_REMOTE_URL" not in os.environ:
os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"
GQL_MOCKS = "gql_mocks.json.gz"
ROCKSET_MOCKS = "rockset_mocks.json.gz"
DRCI_MOCKS = "drci_mocks.json.gz"
@ -75,11 +77,16 @@ def mock_query(
if err.code == 401 or err.code == 403:
err_msg = f"If you are seeing this message during workflow run, please make sure to update {file_name}"
err_msg += f" locally, by deleting it and running {os.path.basename(__file__)} with"
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN"
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN,"
err_msg += " the rockset api key passed via ROCKSET_API_KEY,"
err_msg += " and drci api key passed via DRCI_BOT_KEY environment variables"
if os.getenv("GITHUB_TOKEN") is None or os.getenv("DRCI_BOT_KEY") is None:
if (
os.getenv("GITHUB_TOKEN") is None
or os.getenv("ROCKSET_API_KEY") is None
or os.getenv("DRCI_BOT_KEY") is None
):
err_msg = (
"Failed to update cached queries as GITHUB_TOKEN or DRCI_BOT_KEY "
"Failed to update cached queries as GITHUB_TOKEN or ROCKSET_API_KEY or DRCI_BOT_KEY "
+ "is not defined. "
+ err_msg
)
@ -103,6 +110,16 @@ def mocked_gh_graphql(query: str, **kwargs: Any) -> Any:
return mock_query(gh_graphql_wrapper, GQL_MOCKS, key_function, query, kwargs)
def mocked_rockset_results(head_sha: str, merge_base: str, num_retries: int = 3) -> Any:
return mock_query(
get_rockset_results,
ROCKSET_MOCKS,
lambda x, y: f"{x} {y}",
head_sha,
merge_base,
)
def mocked_drci_classifications(pr_num: int, project: str, num_retries: int = 3) -> Any:
return mock_query(
get_drci_classifications,
@ -256,6 +273,10 @@ def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
]
def empty_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
return []
class DummyGitRepo(GitRepo):
def __init__(self) -> None:
super().__init__(get_git_repo_dir(), get_git_remote_name())
@ -267,6 +288,7 @@ class DummyGitRepo(GitRepo):
return "super awsome commit message"
@mock.patch("trymerge.get_rockset_results", side_effect=empty_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch(
"trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
@ -582,6 +604,7 @@ class TestTryMerge(TestCase):
mocked_gh_fetch_merge_base.assert_called_once()
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch(
@ -820,7 +843,7 @@ class TestBypassFailures(TestCase):
checks = pr.get_checkrun_conclusions()
# Known flaky failure takes precedence over ignore current (need to set the
# merge base here to get the results from Dr. CI, and that categorize the
# merge base here to get the results from Rockset, and that categorize the
# broken trunk failure too
checks = get_classifications(
pr.pr_num,
@ -906,6 +929,7 @@ class TestBypassFailures(TestCase):
)
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch("trymerge.get_drci_classifications", return_value={})
@ -984,6 +1008,7 @@ class TestBypassFailuresOnSandCastle(TestCase):
self.assertTrue(len(failed) == 2)
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch(

View File

@ -452,6 +452,8 @@ RE_DIFF_REV = re.compile(r"^Differential Revision:.+?(D[0-9]+)", re.MULTILINE)
CIFLOW_LABEL = re.compile(r"^ciflow/.+")
CIFLOW_TRUNK_LABEL = re.compile(r"^ciflow/trunk")
MERGE_RULE_PATH = Path(".github") / "merge_rules.yaml"
ROCKSET_MERGES_COLLECTION = "merges"
ROCKSET_MERGES_WORKSPACE = "commons"
REMOTE_MAIN_BRANCH = "origin/main"
DRCI_CHECKRUN_NAME = "Dr.CI"
INTERNAL_CHANGES_CHECKRUN_NAME = "Meta Internal-Only Changes Check"
@ -1178,7 +1180,7 @@ class GitHubPR:
merge_commit_sha = repo.rev_parse(name=self.default_branch())
if comment_id and self.pr_num:
# Finally, upload the record to s3. The list of pending and failed
# Finally, upload the record to Rockset. The list of pending and failed
# checks are at the time of the merge
save_merge_record(
comment_id=comment_id,
@ -1200,7 +1202,7 @@ class GitHubPR:
ignore_current=bool(ignore_current_checks),
)
else:
print("Missing comment ID or PR number, couldn't upload to s3")
print("Missing comment ID or PR number, couldn't upload to Rockset")
# Usually Github will see that the commit has "resolves <pr_num>" in the
# commit message and close the PR, but sometimes it doesn't, leading to
@ -1479,7 +1481,7 @@ def find_matching_merge_rule(
# Categorize all checks when skip_mandatory_checks (force merge) is set. Do it here
# where the list of checks is readily available. These records will be saved into
# s3 merge records
# Rockset merge records
(
pending_mandatory_checks,
failed_mandatory_checks,
@ -1566,7 +1568,7 @@ def save_merge_record(
This saves the merge records as a json, which can later be uploaded to s3
"""
# Prepare the record to be written into s3
# Prepare the record to be written into Rockset
data = [
{
"comment_id": comment_id,
@ -1588,8 +1590,7 @@ def save_merge_record(
"ignore_current": ignore_current,
"error": error,
# This is a unique identifier for the record for deduping purposes
# in Rockset. Any unique string would work. This will not be used
# after we migrate off Rockset
# in rockset. Any unique string would work
"_id": f"{project}-{pr_num}-{comment_id}-{os.environ.get('GITHUB_RUN_ID')}",
}
]
@ -1599,6 +1600,36 @@ def save_merge_record(
json.dump(data, f)
@retries_decorator(rc=[])
def get_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
query = f"""
SELECT
w.name as workflow_name,
j.id,
j.name,
j.conclusion,
j.completed_at,
j.html_url,
j.head_sha,
j.torchci_classification.captures as failure_captures,
LENGTH(j.steps) as steps,
FROM
commons.workflow_job j join commons.workflow_run w on w.id = j.run_id
where
j.head_sha in ('{head_sha}','{merge_base}')
"""
try:
import rockset # type: ignore[import]
res = rockset.RocksetClient(
host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
).sql(query)
return cast(List[Dict[str, Any]], res.results)
except ModuleNotFoundError:
print("Could not use RockSet as rocket dependency is missing")
return []
@retries_decorator()
def get_drci_classifications(pr_num: int, project: str = "pytorch") -> Any:
"""
@ -2036,7 +2067,7 @@ def categorize_checks(
pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on s3
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)
# If required_checks is not set or empty, consider all names are relevant
@ -2095,7 +2126,7 @@ def categorize_checks(
):
failed_checks = failed_checks + flaky_or_broken_trunk
# The list of failed_checks_categorization is returned so that it can be saved into the s3 merge record
# The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
return (pending_checks, failed_checks, failed_checks_categorization)
@ -2379,7 +2410,7 @@ def main() -> None:
handle_exception(e)
if args.comment_id and args.pr_num:
# Finally, upload the record to s3, we don't have access to the
# Finally, upload the record to Rockset, we don't have access to the
# list of pending and failed checks here, but they are not really
# needed at the moment
save_merge_record(
@ -2402,7 +2433,7 @@ def main() -> None:
error=str(e),
)
else:
print("Missing comment ID or PR number, couldn't upload to s3")
print("Missing comment ID or PR number, couldn't upload to Rockset")
finally:
if not args.check_mergeability:
gh_remove_label(

View File

@ -1,31 +0,0 @@
#!/usr/bin/env python3
import re
# Read the contents of runner_determinator.py
with open(".github/scripts/runner_determinator.py") as script_file:
script_content = script_file.read()
# Indent the script content by 10 spaces to match destination indentation
indented_script_content = "\n".join(
[" " * 10 + line if line else line for line in script_content.splitlines()]
)
# Read the contents of _runner-determinator.yml
with open(".github/workflows/_runner-determinator.yml") as yml_file:
yml_content = yml_file.read()
# Replace the content between the markers
new_yml_content = re.sub(
r"(cat <<EOF > runner_determinator.py\n)(.*?)(\n\s+EOF)",
lambda match: match.group(1) + indented_script_content + match.group(3),
yml_content,
flags=re.DOTALL,
)
# Save the modified content back to _runner-determinator.yml
with open(".github/workflows/_runner-determinator.yml", "w") as yml_file:
yml_file.write(new_yml_content)
print("Updated _runner-determinator.yml with the contents of runner_determinator.py")

View File

@ -68,7 +68,6 @@ jobs:
needs: get-label-type
with:!{{ upload.binary_env_as_input(config) }}
{%- if "aarch64" in build_environment %}
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
{%- elif "s390x" in build_environment %}
@ -103,7 +102,6 @@ jobs:
build_name: !{{ config["build_name"] }}
build_environment: !{{ build_environment }}
{%- if "aarch64" in build_environment %}
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
{%- elif "s390x" in build_environment %}

View File

@ -91,14 +91,14 @@ jobs:
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Check if in a container runner
- name: Check if in a ARC runner
shell: bash
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
id: check_arc_runner
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
if: ${{ inputs.cuda-version != 'cpu' && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
- name: Output disk space left
run: |

View File

@ -114,32 +114,22 @@ jobs:
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Check if in a container runner
- name: Check if in a ARC runner
shell: bash
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
id: check_arc_runner
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
id: install-nvidia-driver
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
- name: Setup GPU_FLAG for docker run
id: setup-gpu-flag
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
id: setup-sscache-port-flag
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
- name: Lock NVIDIA A100 40GB Frequency
run: |
sudo nvidia-smi -pm 1
sudo nvidia-smi -ac 1215,1410
nvidia-smi
if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
if: contains(matrix.runner, 'a100')
- name: Start monitoring script
id: monitor-script
@ -218,7 +208,6 @@ jobs:
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
DOCKER_IMAGE: ${{ inputs.docker-image }}
@ -229,7 +218,6 @@ jobs:
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
run: |
set -x
@ -248,7 +236,6 @@ jobs:
# shellcheck disable=SC2086,SC2090
container_name=$(docker run \
${GPU_FLAG:-} \
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e GITHUB_ACTIONS \
@ -278,7 +265,6 @@ jobs:
-e PR_LABELS \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e SCCACHE_S3_KEY_PREFIX \
-e XLA_CUDA \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
@ -288,7 +274,6 @@ jobs:
-e HUGGING_FACE_HUB_TOKEN \
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
-e DASHBOARD_TAG \
-e IS_A100_RUNNER \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
@ -358,7 +343,7 @@ jobs:
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
if: always()
# NB: We are currently having an intermittent GPU-related issue on G5 runners with
# A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does

View File

@ -59,10 +59,6 @@ jobs:
cat <<EOF > runner_determinator.py
# flake8: noqa: G004
# Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
# must be kept in sync. You can do it easily by running the following command:
# python .github/scripts/update_runner_determinator.py
"""
This runner determinator is used to determine which set of runners to run a
GitHub job on. It uses the first comment of a GitHub issue (by default
@ -142,9 +138,6 @@ jobs:
rollout_perc: float = (
0 # Percentage of workflows to experiment on when user is not opted-in.
)
all_branches: bool = (
False # If True, the experiment is also enabled on the exception branches
)
# Add more fields as needed
@ -278,7 +271,7 @@ jobs:
def is_exception_branch(branch: str) -> bool:
"""
Branches that get opted out of experiments by default, until they're explicitly enabled.
Branches that get opted out of all experiments and should always use Meta runners
"""
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
@ -404,10 +397,7 @@ jobs:
def get_runner_prefix(
rollout_state: str,
workflow_requestors: Iterable[str],
branch: str,
is_canary: bool = False,
rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
) -> str:
settings = parse_settings(rollout_state)
user_optins = parse_users(rollout_state)
@ -417,12 +407,6 @@ jobs:
for experiment_name, experiment_settings in settings.experiments.items():
enabled = False
if not experiment_settings.all_branches and is_exception_branch(branch):
log.info(
f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
)
continue
# Is any workflow_requestor opted in to this experiment?
opted_in_users = [
requestor
@ -482,34 +466,35 @@ jobs:
def main() -> None:
args = parse_args()
runner_label_prefix = DEFAULT_LABEL_PREFIX
try:
rollout_state = get_rollout_state_from_issue(
args.github_token, args.github_issue_repo, args.github_issue
if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
log.info(
f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
)
runner_label_prefix = DEFAULT_LABEL_PREFIX
else:
try:
rollout_state = get_rollout_state_from_issue(
args.github_token, args.github_issue_repo, args.github_issue
)
username = get_potential_pr_author(
args.github_token,
args.github_repo,
args.github_actor,
args.github_ref_type,
args.github_branch,
)
username = get_potential_pr_author(
args.github_token,
args.github_repo,
args.github_actor,
args.github_ref_type,
args.github_branch,
)
is_canary = args.github_repo == "pytorch/pytorch-canary"
is_canary = args.github_repo == "pytorch/pytorch-canary"
runner_label_prefix = get_runner_prefix(
rollout_state,
(args.github_issue_owner, username),
args.github_branch,
is_canary,
)
runner_label_prefix = get_runner_prefix(
rollout_state, (args.github_issue_owner, username), is_canary
)
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)

View File

@ -189,7 +189,7 @@ jobs:
run: |
pushd "${PYTORCH_FINAL_PACKAGE_DIR}"
# shellcheck disable=SC2046,SC2102
python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.13.0
python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.12.1
popd
.ci/pytorch/win-test.sh

View File

@ -43,7 +43,7 @@ jobs:
strategy:
fail-fast: false
matrix:
py_vers: [ "3.9", "3.10", "3.11", "3.12" ]
py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
device: ["cuda", "rocm", "xpu"]
include:
- device: "rocm"
@ -91,6 +91,9 @@ jobs:
# Determine python executable for given version
case $PY_VERS in
3.8)
PYTHON_EXECUTABLE=/opt/python/cp38-cp38/bin/python
;;
3.9)
PYTHON_EXECUTABLE=/opt/python/cp39-cp39/bin/python
;;
@ -211,7 +214,7 @@ jobs:
strategy:
fail-fast: false
matrix:
py_vers: [ "3.9", "3.10", "3.11", "3.12" ]
py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
timeout-minutes: 40
env:
DOCKER_IMAGE: pytorch/conda-builder:cpu

View File

@ -67,7 +67,6 @@ jobs:
pytorch-linux-jammy-py3.12-halide,
pytorch-linux-jammy-xpu-2024.0-py3,
pytorch-linux-jammy-py3-clang15-asan,
pytorch-linux-jammy-py3-clang18-asan,
pytorch-linux-focal-py3-clang10-onnx,
pytorch-linux-focal-linter,
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter,
@ -79,9 +78,7 @@ jobs:
- docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks
runner: linux.arm64.m7g.4xlarge
timeout-minutes: 600
# Docker uploads fail from LF runners, see https://github.com/pytorch/pytorch/pull/137358
# runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
runs-on: "${{ matrix.runner }}"
runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
env:
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${{ matrix.docker-image-name }}
steps:

View File

@ -60,7 +60,6 @@ jobs:
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
use_split_build: False
DESIRED_PYTHON: "3.9"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_9-cpu-aarch64
@ -87,7 +86,6 @@ jobs:
DESIRED_PYTHON: "3.9"
build_name: manywheel-py3_9-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
secrets:
@ -132,7 +130,6 @@ jobs:
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.9"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_9-cuda-aarch64
@ -180,7 +177,6 @@ jobs:
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
use_split_build: False
DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cpu-aarch64
@ -207,7 +203,6 @@ jobs:
DESIRED_PYTHON: "3.10"
build_name: manywheel-py3_10-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
secrets:
@ -252,7 +247,6 @@ jobs:
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.10"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_10-cuda-aarch64
@ -300,7 +294,6 @@ jobs:
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
use_split_build: False
DESIRED_PYTHON: "3.11"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cpu-aarch64
@ -327,7 +320,6 @@ jobs:
DESIRED_PYTHON: "3.11"
build_name: manywheel-py3_11-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
secrets:
@ -372,7 +364,6 @@ jobs:
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.11"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_11-cuda-aarch64
@ -420,7 +411,6 @@ jobs:
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
use_split_build: False
DESIRED_PYTHON: "3.12"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cpu-aarch64
@ -447,7 +437,6 @@ jobs:
DESIRED_PYTHON: "3.12"
build_name: manywheel-py3_12-cpu-aarch64
build_environment: linux-aarch64-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.2xlarge
ALPINE_IMAGE: "arm64v8/alpine"
secrets:
@ -492,7 +481,6 @@ jobs:
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.12"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.arm64.m7g.4xlarge.ephemeral
ALPINE_IMAGE: "arm64v8/alpine"
build_name: manywheel-py3_12-cuda-aarch64

View File

@ -3324,353 +3324,3 @@ jobs:
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cpu-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cpu
build_environment: linux-binary-manywheel
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cpu-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cpu-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cpu-cxx11-abi-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu-cxx11-abi
GPU_ARCH_TYPE: cpu-cxx11-abi
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cpu-cxx11-abi
build_environment: linux-binary-manywheel
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-cxx11-abi-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cpu-cxx11-abi-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu-cxx11-abi
GPU_ARCH_TYPE: cpu-cxx11-abi
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu-cxx11-abi
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-cxx11-abi-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cpu-cxx11-abi-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu-cxx11-abi
GPU_ARCH_TYPE: cpu-cxx11-abi
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
DESIRED_DEVTOOLSET: cxx11-abi
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu-cxx11-abi
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda11_8-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda11_8
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda11_8-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda11_8-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda11_8
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda11_8-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda11_8-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda11_8
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda12_1-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_1
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_1-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda12_1-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_1
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_1-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda12_1-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda12_4-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_4
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_4-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda12_4-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_4
build_environment: linux-binary-manywheel
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_4-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda12_4-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: False
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_4
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml

View File

@ -1514,283 +1514,3 @@ jobs:
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda11_8-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda11_8
build_environment: linux-binary-manywheel-split
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda11_8-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda11_8-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda11_8
build_environment: linux-binary-manywheel-split
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda11_8-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda11_8-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda11_8
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda12_1-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_1
build_environment: linux-binary-manywheel-split
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_1-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda12_1-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_1
build_environment: linux-binary-manywheel-split
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_1-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda12_1-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_1
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cuda12_4-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cuda12_4
build_environment: linux-binary-manywheel-split
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_4-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cuda12_4-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_4
build_environment: linux-binary-manywheel-split
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cuda12_4-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cuda12_4-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cuda12_4
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml
manywheel-py3_13t-cpu-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: manywheel-py3_13t-cpu
build_environment: linux-binary-manywheel-split
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- manywheel-py3_13t-cpu-build
- get-label-type
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu
build_environment: linux-binary-manywheel-split
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_13t-cpu-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: manywheel-py3_13t-cpu-test
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cpu
GPU_ARCH_TYPE: cpu
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
use_split_build: True
DESIRED_PYTHON: "3.13t"
build_name: manywheel-py3_13t-cpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
uses: ./.github/workflows/_binary-upload.yml

View File

@ -120,28 +120,6 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cpu-py3_12-inductor-triton-cpu-build:
name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
build-environment: linux-jammy-py3.12-gcc11
docker-image-name: pytorch-linux-jammy-py3.12-triton-cpu
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
test-matrix: |
{ include: [
{ config: "inductor-triton-cpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
]}
linux-jammy-cpu-py3_12-inductor-triton-cpu-test:
name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-cpu-py3_12-inductor-triton-cpu-build
with:
build-environment: linux-jammy-py3.12-gcc11
docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.test-matrix }}
linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
# Should be synced with the one in inductor-periodic.yml but this only runs inductor_timm
name: cuda12.4-py3.10-gcc9-sm86

View File

@ -11,6 +11,7 @@ jobs:
contents: read
pull-requests: write
runs-on: lf.linux.2xlarge
continue-on-error: true
if: ${{ github.repository_owner == 'pytorch' }}
steps:
- name: Checkout pytorch
@ -30,12 +31,10 @@ jobs:
bash .github/scripts/lintrunner.sh
- name: Check for changes
id: git-check
continue-on-error: true
run: |
git diff --exit-code || echo "changes=true" >> "$GITHUB_OUTPUT"
- name: Suggest changes
if: steps.git-check.outputs.changes == 'true'
continue-on-error: true
uses: parkerbxyz/suggest-changes@v1
with:
comment: "Please commit the suggested changes from pytorch's linter."

View File

@ -223,7 +223,6 @@ jobs:
cache: pip
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.2.* fbscribelogger==0.1.* numpy==1.24.*
pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
- name: Run run_test.py (nonretryable)

View File

@ -57,10 +57,10 @@ jobs:
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
]}
linux-focal-cuda12_1-py3_10-gcc9-test:
@ -89,10 +89,10 @@ jobs:
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
]}
@ -118,10 +118,9 @@ jobs:
docker-image-name: pytorch-linux-jammy-py3.9-gcc11
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
]}
parallelnative-linux-jammy-py3_9-gcc11-test:
@ -340,10 +339,10 @@ jobs:
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
]}

View File

@ -185,10 +185,10 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.9-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@ -217,10 +217,10 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.11-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@ -251,10 +251,10 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.12-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "dynamo", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@ -588,9 +588,9 @@ jobs:
docker-image-name: pytorch-linux-focal-py3.12-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 3, runner: "linux.4xlarge" },
{ config: "default", shard: 2, num_shards: 3, runner: "linux.4xlarge" },
{ config: "default", shard: 3, num_shards: 3, runner: "linux.4xlarge" },
{ config: "default", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
{ config: "dynamo", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
{ config: "dynamo", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
{ config: "dynamo", shard: 3, num_shards: 3, runner: "linux.2xlarge" },

View File

@ -1,24 +0,0 @@
name: s390
on:
push:
branches:
- main
tags:
- ciflow/s390/*
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions: read-all
jobs:
linux-manylinux-2_28-py3-cpu-s390x-build:
name: linux-manylinux-2_28-py3-cpu-s390x
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-s390x-binary-manywheel
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x-main
runner: linux.s390x

View File

@ -266,10 +266,10 @@ jobs:
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
@ -316,3 +316,11 @@ jobs:
build-environment: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
docker-image: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
linux-manylinux-2_28-py3-cpu-s390x-build:
name: linux-manylinux-2_28-py3-cpu-s390x
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-s390x-binary-manywheel
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x-main
runner: linux.s390x

View File

@ -28,7 +28,7 @@ jobs:
check-latest: false
cache: pip
architecture: x64
- run: pip install pyyaml==6.0
- run: pip install pyyaml==6.0 rockset==1.0.3
- name: Setup committer id
run: |
@ -43,6 +43,7 @@ jobs:
COMMENT_ID: ${{ github.event.client_payload.comment_id }}
REBASE: ${{ github.event.client_payload.rebase }}
IGNORE_CURRENT: ${{ github.event.client_payload.ignore_current }}
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
DRCI_BOT_KEY: ${{ secrets.DRCI_BOT_KEY }}
GITHUB_RUN_ID: ${{ github.run_id }}
run: |

View File

@ -153,7 +153,7 @@ init_command = [
'junitparser==2.1.1',
'rich==10.9.0',
'pyyaml==6.0.1',
'optree==0.13.0',
'optree==0.12.1',
]
[[linter]]
@ -216,10 +216,6 @@ include_patterns = [
'torch/csrc/*.cpp',
'torch/csrc/**/*.h',
'torch/csrc/**/*.cpp',
'torch/csrc/distributed/autograd/**/*.cpp',
'torch/csrc/distributed/autograd/**/*.h',
'torch/csrc/distributed/rpc/**/*.cpp',
'torch/csrc/distributed/rpc/**/*.h',
'torch/csrc/jit/serialization/*.h',
'torch/csrc/jit/serialization/*.cpp',
]

View File

@ -1083,16 +1083,8 @@ if(NOT MSVC)
append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
if(CMAKE_BUILD_TYPE MATCHES Debug)
message(Warning "Applying -Og optimization for aarch64 GCC debug build to workaround ICE")
endif()
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
else()
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
endif()
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)

View File

@ -121,7 +121,7 @@ torch/profiler/ @aaronenyeshi @sraikund16
test/functorch/test_aotdispatch.py @ezyang @Chillee
# Dataloader
torch/utils/data/ @andrewkho @divyanshk
torch/utils/data/ @andrewkho @gokulavasan
# hipify
torch/utils/hipify/ @jeffdaily @jithunnair-amd

View File

@ -39,16 +39,25 @@ class TORCH_API Context {
const Generator& defaultGenerator(Device device) {
c10::DeviceType device_type = device.type();
lazyInitDevice(device_type);
initCUDAIfNeeded(device_type);
initHIPIfNeeded(device_type);
if (device_type == at::kCPU) {
return at::detail::getDefaultCPUGenerator();
} else if (device_type == at::kCUDA) {
return at::detail::getCUDAHooks().getDefaultCUDAGenerator(device.index());
} else if (device_type == at::kMPS) {
return at::detail::getMPSHooks().getDefaultMPSGenerator();
} else if (device_type == at::kXPU) {
return at::detail::getXPUHooks().getDefaultXPUGenerator(device.index());
} else if (device_type == at::kIPU) {
return at::detail::getIPUHooks().getDefaultIPUGenerator(device.index());
} else if (device_type == at::kPrivateUse1) {
return at::detail::getPrivateUse1Hooks().getDefaultGenerator(
device.index());
} else {
return getAcceleratorHooksInterface(device_type)
.getDefaultGenerator(device.index());
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
}
}
const AcceleratorHooksInterface& getAcceleratorHooksInterface(
std::optional<c10::DeviceType> opt_device_type = std::nullopt) {
c10::DeviceType device_type = opt_device_type.has_value()
@ -71,10 +80,10 @@ class TORCH_API Context {
c10::DeviceTypeName(device_type), " device type not an accelerator.");
}
}
Device getDeviceFromPtr(void* data, c10::DeviceType device_type) {
lazyInitDevice(device_type);
initCUDAIfNeeded(device_type);
initHIPIfNeeded(device_type);
initXPUIfNeeded(device_type);
if (device_type == at::kCPU) {
return c10::DeviceType::CPU;
} else if (device_type == at::kCUDA) {
@ -87,7 +96,6 @@ class TORCH_API Context {
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
}
}
bool isPinnedPtr(
const void* data,
std::optional<c10::DeviceType> device_type = std::nullopt) {
@ -98,22 +106,13 @@ class TORCH_API Context {
opt_device_type.value())) { // passed device not an accelerator
return false;
}
return getAcceleratorHooksInterface(opt_device_type).isPinnedPtr(data);
return getAcceleratorHooksInterface(opt_device_type.value())
.isPinnedPtr(data);
}
Allocator* getPinnedMemoryAllocator(
std::optional<c10::DeviceType> device_type = std::nullopt) {
return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator();
}
void lazyInitDevice(c10::DeviceType device_type) {
if (device_type != at::kCPU) {
c10::call_once(init_[static_cast<int8_t>(device_type)], [&] {
getAcceleratorHooksInterface(device_type).init();
});
}
}
static bool hasOpenMP();
static bool hasMKL();
static bool hasLAPACK();
@ -166,6 +165,27 @@ class TORCH_API Context {
static bool hasMAIA() {
return c10::impl::hasDeviceGuardImpl(c10::DeviceType::MAIA);
}
// defined in header so that getNonVariableType has ability to inline
// call_once check. getNonVariableType is called fairly frequently
void lazyInitCUDA() {
c10::call_once(thc_init, [&] { detail::getCUDAHooks().initCUDA(); });
}
void lazyInitHIP() {
c10::call_once(thh_init, [&] { detail::getHIPHooks().initHIP(); });
}
void lazyInitXPU() {
c10::call_once(thx_init, [&] { detail::getXPUHooks().initXPU(); });
}
void lazyInitMTIA() {
c10::call_once(th_mtia_init, [&] { detail::getMTIAHooks().initMTIA(); });
}
void lazyInitPrivateUse1() {
c10::call_once(thp_init, [&] {
if (isPrivateUse1HooksRegistered()) {
at::detail::getPrivateUse1Hooks().initPrivateUse1();
}
});
}
static const at::cuda::NVRTC& getNVRTC() {
return detail::getCUDAHooks().nvrtc();
}
@ -341,8 +361,27 @@ class TORCH_API Context {
void setAllowFP16ReductionCPU(bool);
private:
void initCUDAIfNeeded(c10::DeviceType p) {
if (p == c10::DeviceType::CUDA) {
lazyInitCUDA();
}
}
void initHIPIfNeeded(c10::DeviceType p) {
if (p == c10::DeviceType::HIP) {
lazyInitHIP();
}
}
void initXPUIfNeeded(c10::DeviceType p) {
if (p == c10::DeviceType::XPU) {
lazyInitXPU();
}
}
static bool checkCuBLASConfigDeterministic();
std::array<c10::once_flag, at::COMPILE_TIME_MAX_DEVICE_TYPES> init_;
c10::once_flag thc_init;
c10::once_flag thh_init;
c10::once_flag thx_init;
c10::once_flag th_mtia_init;
c10::once_flag thp_init;
bool enabled_cudnn = true;
bool deterministic_cudnn = false;
bool deterministic_mkldnn = false;
@ -474,7 +513,7 @@ inline size_t getNumGPUs() {
"to be CUDA (e.g., when you say CUDA, on a HIP build of ATen, this actually "
"means HIP. Rebuild PyTorch with one or the other disabled.");
} else if (hasCUDA()) {
return detail::getCUDAHooks().deviceCount();
return detail::getCUDAHooks().getNumGPUs();
} else if (hasHIP()) {
return detail::getHIPHooks().getNumGPUs();
} else {
@ -511,7 +550,7 @@ inline void manual_seed(uint64_t seed) {
}
// NB: Sometimes we build with CUDA, but we don't have any GPUs
// available. In that case, we must not seed CUDA; it will fail!
const auto cuda_num_gpus = detail::getCUDAHooks().deviceCount();
const auto cuda_num_gpus = detail::getCUDAHooks().getNumGPUs();
if (hasCUDA() && cuda_num_gpus > 0) {
for (const auto i : c10::irange(cuda_num_gpus)) {
auto cuda_gen = globalContext().defaultGenerator(
@ -524,7 +563,7 @@ inline void manual_seed(uint64_t seed) {
}
}
const auto xpu_num_gpus = detail::getXPUHooks().deviceCount();
const auto xpu_num_gpus = detail::getXPUHooks().getNumGPUs();
if (hasXPU() && xpu_num_gpus) {
for (const auto i : c10::irange(xpu_num_gpus)) {
auto xpu_gen = globalContext().defaultGenerator(

View File

@ -22,6 +22,13 @@ DLDataType getDLDataType(const Tensor& t) {
case ScalarType::UInt64:
dtype.code = DLDataTypeCode::kDLUInt;
break;
case ScalarType::Int1:
case ScalarType::Int2:
case ScalarType::Int3:
case ScalarType::Int4:
case ScalarType::Int5:
case ScalarType::Int6:
case ScalarType::Int7:
case ScalarType::Char:
dtype.code = DLDataTypeCode::kDLInt;
break;

View File

@ -112,12 +112,12 @@
// Ensure we never have too many scalar types for the expansion here to
// support. To bump this, you must regenerate the macros below.
static_assert(static_cast<int>(c10::ScalarType::NumOptions) < 45);
static_assert(static_cast<int>(c10::ScalarType::NumOptions) < 60);
// Python code to regenerate generate code below:
#if 0
num_args = 45
num_args = 60
nums = ', '.join(str(i) for i in reversed(range(num_args+1)))
args = ', '.join(f'_{i}' for i in range(1, num_args+1))
@ -135,8 +135,8 @@ for i in range(1, num_args+1):
// Begin generated code
// clang-format off
#define AT_NUM_ARGS(...) AT_EXPAND(AT_NUM_ARGS_AUX(__VA_ARGS__, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
#define AT_NUM_ARGS_AUX(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, N, ...) N
#define AT_NUM_ARGS(...) AT_EXPAND(AT_NUM_ARGS_AUX(__VA_ARGS__, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
#define AT_NUM_ARGS_AUX(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, N, ...) N
#define AT_AP1(N, _1) AT_DISPATCH_CASE(_1, N)
#define AT_AP2(N, _1, _2) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N)
#define AT_AP3(N, _1, _2, _3) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N)
@ -182,5 +182,21 @@ for i in range(1, num_args+1):
#define AT_AP43(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N)
#define AT_AP44(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N)
#define AT_AP45(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N)
#define AT_AP46(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N)
#define AT_AP47(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N)
#define AT_AP48(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N)
#define AT_AP49(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N)
#define AT_AP50(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N)
#define AT_AP51(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N)
#define AT_AP52(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N)
#define AT_AP53(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N)
#define AT_AP54(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N)
#define AT_AP55(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N)
#define AT_AP56(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N)
#define AT_AP57(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N)
#define AT_AP58(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N)
#define AT_AP59(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N) AT_DISPATCH_CASE(_59, N)
#define AT_AP60(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N) AT_DISPATCH_CASE(_59, N) AT_DISPATCH_CASE(_60, N)
// End generated code
// clang-format on

View File

@ -18,8 +18,6 @@ c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
// To properly support this, see https://github.com/pytorch/pytorch/issues/14560
if (at::globalContext().hasCUDA()) {
return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
} else if (at::globalContext().hasMTIA()) {
return at::detail::getMTIAHooks().getPinnedMemoryAllocator();
} else if (at::globalContext().hasXPU()) {
return at::detail::getXPUHooks().getPinnedMemoryAllocator();
} else if(at::isPrivateUse1HooksRegistered()) {

View File

@ -420,15 +420,15 @@ inline c10::MaybeOwned<Tensor> expand_size(
inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
// expands a list of Tensors; ignores undefined (null) tensors
bool first = true;
SymDimVector sizes;
DimVector sizes;
for (const auto i : c10::irange(to_expand.size())) {
if (!to_expand[i].defined()) {
continue;
} else if (first) {
sizes = to_expand[i].sym_sizes();
sizes = to_expand[i].sizes();
first = false;
} else {
sizes = infer_size_symdimvector(sizes, to_expand[i].sym_sizes());
sizes = infer_size_dimvector(sizes, to_expand[i].sizes());
}
}
@ -436,10 +436,10 @@ inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
for (const auto i : c10::irange(to_expand.size())) {
if (!to_expand[i].defined()) {
continue;
} else if (to_expand[i].sym_sizes().equals(sizes)) {
} else if (to_expand[i].sizes().equals(sizes)) {
result[i] = to_expand[i];
} else {
result[i] = to_expand[i].expand_symint(sizes);
result[i] = to_expand[i].expand(sizes);
}
}
return result;

View File

@ -209,8 +209,8 @@ void init_num_threads() {
}
void set_num_threads(int nthreads) {
#ifndef C10_MOBILE
TORCH_CHECK(nthreads > 0, "Expected positive number of threads");
#ifndef C10_MOBILE
int no_value = NOT_SET;
if (!num_intraop_threads.compare_exchange_strong(no_value, nthreads)) {
// num_intraop_threads either stores a positive integer or CONSUMED,
@ -229,9 +229,8 @@ void set_num_threads(int nthreads) {
}
}
#else
caffe2::PThreadPool* const pool = caffe2::pthreadpool();
caffe2::PThreadPool* const pool = caffe2::pthreadpool(nthreads);
TORCH_INTERNAL_ASSERT(pool, "Invalid thread pool!");
pool->set_thread_count(nthreads);
#endif // C10_MOBILE
}

View File

@ -19,7 +19,7 @@ Tensor& scalar_fill(Tensor& self, const Scalar& value) {
AT_DISPATCH_V2(
self.scalar_type(), "fill_out", AT_WRAP([&]() {
fill_inplace<scalar_t>(self, value);
}), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
}), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
return self;
}

View File

@ -144,8 +144,8 @@ class CheckSparseTensorInvariants {
bool old_state;
public:
CheckSparseTensorInvariants(bool state)
: old_state(at::globalContext().checkSparseTensorInvariants()) {
CheckSparseTensorInvariants(bool state) {
old_state = at::globalContext().checkSparseTensorInvariants();
at::globalContext().setCheckSparseTensorInvariants(state);
}

View File

@ -255,9 +255,7 @@ inline Tensor applySelect(
// the other hand, indexing wraping is valid for all negative int64_t
// values, as x[INT64_MIN] is the same as x[INT64_MAX]
TORCH_CHECK_INDEX(
size.sym_gt(-1 - index)
.sym_and(size.sym_gt(index))
.expect_true(__FILE__, __LINE__),
size > -1 - index && size > index,
"index ",
index,
" is out of bounds for dimension ",

View File

@ -82,7 +82,7 @@ class TORCH_API ThreadLocalState {
!defined(BUILD_LITE_INTERPRETER)
// TLS for autocast dtypes
std::array<at::ScalarType, at::COMPILE_TIME_MAX_DEVICE_TYPES>
autocast_dtypes_{};
autocast_dtypes_;
#endif
friend class ThreadLocalStateGuard;

View File

@ -13,6 +13,8 @@
#include <ATen/core/Array.h>
#include <c10/macros/Macros.h>
#include <c10/util/Exception.h>
#include <c10/util/Half.h>
#include <cmath>
#include <cstdint>

View File

@ -306,10 +306,11 @@ struct VecConvert<float, 1, BFloat16, 1> {
const VectorizedN<BFloat16, 1>& src) {
VectorizedN<float, 1> result;
uint16x8_t u16_8 = vld1q_u16(reinterpret_cast<const uint16_t*>(&src[0]));
int32x4_t shift = vdupq_n_s32(16);
auto u16_low1 = vget_low_u16(u16_8);
auto u16_high1 = vget_high_u16(u16_8);
float32x4_t f32x4_0 = vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(u16_low1), 16));
float32x4_t f32x4_1 = vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(u16_high1), 16));
float32x4_t f32x4_0 = vreinterpretq_f32_u32(vshlq_u32(vmovl_u16(u16_low1), shift));
float32x4_t f32x4_1 = vreinterpretq_f32_u32(vshlq_u32(vmovl_u16(u16_high1), shift));
result[0] = {f32x4_0, f32x4_1};
return result;
}

View File

@ -75,7 +75,7 @@ inline __m256i pack_saturate_and_clamp<int32_t>(
int32_t /*min_val*/,
int32_t /*max_val*/) {
// This function is for linkage only, will not be used
TORCH_CHECK(false, "pack_saturate_and_clamp<int32_t> is not supported");
AT_ERROR("pack_saturate_and_clamp<int32_t> is not supported");
}
template <>

View File

@ -77,7 +77,7 @@ inline __m512i pack_saturate_and_clamp<int32_t>(
int32_t min_val [[maybe_unused]],
int32_t max_val [[maybe_unused]]) {
// This function is for linkage only, will not be used
TORCH_CHECK(false, "pack_saturate_and_clamp<int32_t> is not supported");
AT_ERROR("pack_saturate_and_clamp<int32_t> is not supported");
return __m512i{};
}

View File

@ -125,7 +125,7 @@ void CUDAGraph::capture_begin(MempoolId_t pool/*=0*/, cudaStreamCaptureMode capt
// due to the capture status being updated _after_ a capture had already started.
c10::cuda::CUDACachingAllocator::beginAllocateToPool(capture_dev_, mempool_id_, [this](cudaStream_t stream) {
cudaStreamCaptureStatus status;
CaptureId_t stream_capture_id = 0;
CaptureId_t stream_capture_id;
AT_CUDA_CHECK(cudaStreamGetCaptureInfo(stream, &status, &stream_capture_id));
return status == cudaStreamCaptureStatus::cudaStreamCaptureStatusActive && stream_capture_id == capture_id_;
});

View File

@ -10,7 +10,7 @@ TensorBase empty_cuda(
ScalarType dtype,
std::optional<Device> device_opt,
std::optional<c10::MemoryFormat> memory_format_opt) {
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
at::globalContext().lazyInitCUDA();
const auto device = device_or_default(device_opt);
TORCH_INTERNAL_ASSERT(device.is_cuda());
const DeviceGuard device_guard(device);
@ -50,7 +50,7 @@ TensorBase empty_strided_cuda(
IntArrayRef stride,
ScalarType dtype,
std::optional<Device> device_opt) {
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
at::globalContext().lazyInitCUDA();
const auto device = device_or_default(device_opt);
TORCH_INTERNAL_ASSERT(device.is_cuda());
const DeviceGuard device_guard(device);

View File

@ -34,7 +34,7 @@ void init_p2p_access_cache(int64_t num_devices) {
} // namespace detail
bool get_p2p_access(int dev, int dev_to_access) {
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
at::globalContext().lazyInitCUDA();
TORCH_CHECK(dev >= 0 || dev < num_devices_,
dev, " is not a device");

View File

@ -14,7 +14,6 @@
#include <ATen/detail/CUDAHooksInterface.h>
#include <ATen/native/cuda/CuFFTPlanCache.h>
#include <c10/util/Exception.h>
#include <c10/util/env.h>
#include <c10/cuda/CUDACachingAllocator.h>
#include <c10/cuda/CUDAFunctions.h>
#include <c10/util/irange.h>
@ -80,19 +79,30 @@ struct _Initializer {
} initializer;
} // anonymous namespace
// Sets the CUDA_MODULE_LOADING environment variable
// if it's not set by the user.
void maybe_set_cuda_module_loading(const std::string &def_value) {
auto value = std::getenv("CUDA_MODULE_LOADING");
if (!value) {
#ifdef _WIN32
auto env_var = "CUDA_MODULE_LOADING=" + def_value;
_putenv(env_var.c_str());
#else
setenv("CUDA_MODULE_LOADING", def_value.c_str(), 1);
#endif
}
}
// NB: deleter is dynamic, because we need it to live in a separate
// compilation unit (alt is to have another method in hooks, but
// let's not if we don't need to!)
void CUDAHooks::init() const {
void CUDAHooks::initCUDA() const {
C10_LOG_API_USAGE_ONCE("aten.init.cuda");
// Force the update to enable unit testing. This code get executed before unit tests
// have a chance to enable vitals.
at::vitals::VitalsAPI.setVital("CUDA", "used", "true", /* force = */ true);
// Sets the CUDA_MODULE_LOADING environment variable
// if it's not set by the user.
c10::utils::set_env("CUDA_MODULE_LOADING", "LAZY", false);
maybe_set_cuda_module_loading("LAZY");
const auto num_devices = c10::cuda::device_count_ensure_non_zero();
c10::cuda::CUDACachingAllocator::init(num_devices);
at::cuda::detail::init_p2p_access_cache(num_devices);
@ -103,7 +113,7 @@ void CUDAHooks::init() const {
#endif
}
const Generator& CUDAHooks::getDefaultGenerator(DeviceIndex device_index) const {
const Generator& CUDAHooks::getDefaultCUDAGenerator(DeviceIndex device_index) const {
return at::cuda::detail::getDefaultCUDAGenerator(device_index);
}
@ -231,9 +241,6 @@ DeviceIndex current_device() {
return -1;
}
/**
* DEPRECATED: use getCurrentDevice() instead
*/
DeviceIndex CUDAHooks::current_device() const {
return at::cuda::detail::current_device();
}
@ -429,21 +436,10 @@ void CUDAHooks::cuFFTClearPlanCache(DeviceIndex device_index) const {
at::native::detail::cufft_clear_plan_cache_impl(device_index);
}
/**
* DEPRECATED: use deviceCount() instead
*/
int CUDAHooks::getNumGPUs() const {
return at::cuda::device_count();
}
DeviceIndex CUDAHooks::deviceCount() const {
return at::cuda::device_count();
}
DeviceIndex CUDAHooks::getCurrentDevice() const {
return at::cuda::detail::current_device();
}
#ifdef USE_ROCM
bool CUDAHooks::isGPUArch(DeviceIndex device_index, const std::vector<std::string>& archs) const {
hipDeviceProp_t* prop = at::cuda::getDeviceProperties(device_index);

View File

@ -19,11 +19,10 @@ TORCH_CUDA_CPP_API void set_magma_init_fn(void (*magma_init_fn)());
// The real implementation of CUDAHooksInterface
struct CUDAHooks : public at::CUDAHooksInterface {
CUDAHooks(at::CUDAHooksArgs) {}
void init() const override;
void initCUDA() const override;
Device getDeviceFromPtr(void* data) const override;
bool isPinnedPtr(const void* data) const override;
const Generator& getDefaultGenerator(
DeviceIndex device_index = -1) const override;
const Generator& getDefaultCUDAGenerator(DeviceIndex device_index = -1) const override;
bool hasCUDA() const override;
bool hasMAGMA() const override;
bool hasCuDNN() const override;
@ -50,9 +49,6 @@ struct CUDAHooks : public at::CUDAHooksInterface {
int64_t cuFFTGetPlanCacheSize(DeviceIndex device_index) const override;
void cuFFTClearPlanCache(DeviceIndex device_index) const override;
int getNumGPUs() const override;
DeviceIndex deviceCount() const override;
DeviceIndex getCurrentDevice() const override;
#ifdef USE_ROCM
bool isGPUArch(DeviceIndex device_index, const std::vector<std::string>& archs) const override;
#endif

View File

@ -77,31 +77,6 @@ default, now called through TunableOp. Any call to at::cuda::blas::gemm() or ::b
when enabled. Calling gemm() for a given set of input arguments (transa, transb, m, n, k) will attempt to use the
fastest available implementation across both rocblas and hipblaslt.
## Offline Tuning
### Motivation
Basically it is used for workload with high-memory utilization where one might run out of memory with regular tuning.
### Workflow
There are basically two steps:
1) Set the environment variables to collect the untuned GEMM and this will generate `tunableop_untuned?.csv` ("?" is placeholder for the GPU ID), like:
```
PYTORCH_TUNABLEOP_ENABLED=1
PYTORCH_TUNABLEOP_TUNING=0
PYTORCH_TUNABLEOP_RECORD_UNTUNED=1
...
```
2) Run a Python script that reads the `tunableop_untuned?.csv` and generates the `tunableop_results?.csv`, like:
```
import torch.cuda.tunable as tunable
import os
os.putenv('PYTORCH_TUNABLEOP_ENABLED', '1')
os.putenv('PYTORCH_TUNABLEOP_TUNING', '1')
os.putenv('PYTORCH_TUNABLEOP_RECORD_UNTUNED', '0')
tunable.tune_gemm_in_file("tunableop_results?.csv")
```
## Tuning Context
The behavior of TunableOp is currently manipulated through environment variables, the C++ interface of
at::cuda::tunable::getTuningContext(), or the `torch.cuda.tunable` python interfaces. The environment variables take
@ -115,8 +90,6 @@ programmatically since the settings become fixed. Use the C++ or Python APIs ins
| -------------------- | ----------- |
| PYTORCH_TUNABLEOP_ENABLED | Default is 0. Set to 1 to enable. |
| PYTORCH_TUNABLEOP_TUNING | Default is 1. Set to 0 to disable. |
| PYTORCH_TUNABLEOP_RECORD_UNTUNED | Default is 0. Set to 1 to enable. |
| PYTORCH_TUNABLEOP_UNTUNED_FILENAME | Default is 'tunableop_untuned.csv'. |
| PYTORCH_TUNABLEOP_VERBOSE | Default is 0. Set to 1 to enable basic logging. 2 for basic tuning status. 3 for full trace. |
| PYTORCH_TUNABLEOP_VERBOSE_FILENAME | Default is "err" for stderr. Set to "out" for stdout or a filename for capturing verbose logging. |
| PYTORCH_TUNABLEOP_FILENAME | Default is 'tunableop_results.csv'. |
@ -139,8 +112,6 @@ All python APIs exist in the `torch.cuda.tunable` module.
| is_enabled() -> bool | |
| tuning_enable(val: bool = True) -> None | Default is True. |
| tuning_is_enabled() -> bool | |
| record_untuned_enable(val: bool = True) -> None | Default is True. |
| record_untuned_is_enabled() -> bool | |
| set_max_tuning_duration(duration: int) -> None | |
| get_max_tuning_duration() -> int | |
| set_max_tuning_iterations(iterations: int) -> None | |
@ -152,7 +123,6 @@ All python APIs exist in the `torch.cuda.tunable` module.
| write_file_on_exit(val: bool) -> None | Default is True. |
| write_file(filename: Optional[str] = None) -> None | If filename not given, it will call get_filename(). |
| read_file(filename: Optional[str] = None) -> None | If filename not given, it will call get_filename(). |
| tune_gemm_in_file(filename: str) -> None | read an untuned file and tune GEMMs in it. |
### C++ Interface
Example:

View File

@ -112,32 +112,6 @@ void TuningResultsManager::Add(const std::string& op_signature, const std::strin
AddImpl(op_signature, params_signature, best, it->second);
}
void TuningResultsManager::RecordUntuned( std::ofstream& untuned_file, const std::string& op_signature, const std::string& params_signature) {
std::scoped_lock l{lock_};
if (!untuned_file.good()) {
TORCH_WARN_ONCE("failed to open file for writing; untuned gemm will not be saved");
return;
} else {
bool isNew = false;
auto it = untuned_results_.find(op_signature);
if (it == untuned_results_.end()) {
it = untuned_results_.insert({op_signature, {}}).first;
isNew = true;
}
auto it_kernel_map = it->second.find(params_signature);
if (it_kernel_map == it->second.end()) {
it->second.insert(params_signature);
isNew = true;
}
if (isNew) {
untuned_file << op_signature << "," << params_signature << std::endl;
TUNABLE_LOG3("Untuned,", op_signature, ",", params_signature);
}
}
}
void TuningResultsManager::Delete(const std::string& op_signature, const std::string& params_signature) {
std::scoped_lock l{lock_};
@ -385,7 +359,6 @@ TuningStatus TuningResultsValidator::ValidatePyTorchVersion(const std::string& v
TuningContext::TuningContext() :
enable_{false},
tuning_enable_{true},
record_untuned_enable_{false},
manager_initialized_{false},
write_file_on_exit_{true},
numerics_check_enable_{false},
@ -396,7 +369,6 @@ TuningContext::TuningContext() :
icache_flush_{true},
rotating_buffer_size_{-1},
filename_{},
untuned_file_{},
results_count_from_input_file_{0}
{
}
@ -422,10 +394,6 @@ TuningContext::~TuningContext() {
}
}
}
if (untuned_file_.good()) {
untuned_file_.close();
}
}
void TuningContext::EnableTunableOp(bool value) {
@ -456,15 +424,6 @@ void TuningContext::EnableTuning(bool value) {
}
}
void TuningContext::EnableRecordUntuned(bool value) {
record_untuned_enable_ = value;
if (value) {
TUNABLE_LOG1("Enable Record Untuned for TunableOp");
} else {
TUNABLE_LOG1("Disable Record Untuned for TunableOp");
}
}
bool TuningContext::IsTuningEnabled() const {
static const char *env = std::getenv("PYTORCH_TUNABLEOP_TUNING");
if (env != nullptr && strcmp(env, "0") == 0) {
@ -473,33 +432,6 @@ bool TuningContext::IsTuningEnabled() const {
return tuning_enable_;
}
bool TuningContext::IsRecordUntunedEnabled() const {
static const char *env = std::getenv("PYTORCH_TUNABLEOP_RECORD_UNTUNED");
if (env != nullptr && strcmp(env, "1") == 0) {
return true;
}
return record_untuned_enable_;
}
std::ofstream& TuningContext::GetUntunedFile(){
if (!untuned_file_.is_open()) {
const char *env = std::getenv("PYTORCH_TUNABLEOP_UNTUNED_FILENAME");
std::string filename = (env == nullptr) ? "tunableop_untuned.csv" : env;
std::string device = c10::str(int(c10::cuda::current_device()));
std::size_t found = filename.rfind(".");
if (found != std::string::npos) {
filename.insert(found, device);
} else {
// all else fails, just append
filename.append(device);
}
untuned_file_ = std::ofstream(filename, std::ios::out | std::ios::trunc);
}
return untuned_file_;
}
void TuningContext::WriteFileOnExit(bool value) {
write_file_on_exit_ = value;
}
@ -613,7 +545,7 @@ TuningResultsManager& TuningContext::GetTuningResultsManager() {
SetFilename(filename, true);
}
auto filename = GetFilename();
if (!filename.empty() && !IsRecordUntunedEnabled()) {
if (!filename.empty()) {
ReadFile(filename);
// attempt immediately to open file for writing to catch errors early
std::ofstream file(filename, std::ios::out | std::ios::app);

View File

@ -19,7 +19,6 @@
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
@ -88,7 +87,6 @@ class TORCH_CUDA_CPP_API ResultEntry {
typedef std::unordered_map<std::string, ResultEntry> KernelMap;
typedef std::unordered_map<std::string, KernelMap> ResultsMap;
typedef std::unordered_map<std::string, std::unordered_set<std::string>> UntunedMap;
struct TORCH_CUDA_CPP_API TuningResults {
// Validates if these results are compatible with the libraries
@ -131,12 +129,9 @@ class TORCH_CUDA_CPP_API TuningResultsManager {
size_t GetSize();
void RecordUntuned( std::ofstream& untuned_file, const std::string& op_signature, const std::string& params_signature);
private:
std::mutex lock_;
ResultsMap results_;
UntunedMap untuned_results_;
};
class TORCH_CUDA_CPP_API TuningResultsValidator {
@ -178,10 +173,6 @@ class TORCH_CUDA_CPP_API TuningContext {
void EnableTuning(bool value);
bool IsTuningEnabled() const;
void EnableRecordUntuned(bool value);
bool IsRecordUntunedEnabled() const;
std::ofstream& GetUntunedFile();
void EnableNumericsCheck(bool value);
bool IsNumericsCheckEnabled() const;
@ -222,7 +213,6 @@ class TORCH_CUDA_CPP_API TuningContext {
private:
bool enable_;
bool tuning_enable_;
bool record_untuned_enable_;
bool manager_initialized_;
bool write_file_on_exit_;
bool numerics_check_enable_;
@ -236,7 +226,6 @@ class TORCH_CUDA_CPP_API TuningContext {
mutable c10::once_flag manager_init_once_;
TuningResultsValidator validator_;
std::string filename_;
std::ofstream untuned_file_;
size_t results_count_from_input_file_;
};

View File

@ -54,15 +54,9 @@ class TunableOp {
auto params_sig = params->Signature();
result = mgr.Lookup(op_sig, params_sig);
// If there is not previous tuning result been found, we do the tuning iff tuning is enabled
if (result == ResultEntry::Null()) {
if (ctx->IsTuningEnabled()) {
result = FindFastest(params);
mgr.Add(op_sig, params_sig, result);
}
else if (ctx->IsRecordUntunedEnabled()) {
// or record the gemm into file
mgr.RecordUntuned(ctx->GetUntunedFile(), op_sig, params_sig);
}
if (result == ResultEntry::Null() && ctx->IsTuningEnabled()) {
result = FindFastest(params);
mgr.Add(op_sig, params_sig, result);
}
}
else {

View File

@ -1,13 +1,9 @@
#pragma once
#include <ATen/core/Generator.h>
#include <c10/core/Allocator.h>
#include <c10/core/Device.h>
#include <c10/core/Stream.h>
#include <c10/core/Allocator.h>
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
namespace at {
// AcceleratorHooksInterface is a shared interface provided by all
@ -23,10 +19,6 @@ struct TORCH_API AcceleratorHooksInterface {
// Whether the device at device_index is fully initialized or not.
virtual bool hasPrimaryContext(DeviceIndex device_index) const = 0;
virtual void init() const {
TORCH_CHECK(false, "Backend doesn`t support init()");
}
virtual DeviceIndex deviceCount() const {
return 0;
}
@ -58,18 +50,7 @@ struct TORCH_API AcceleratorHooksInterface {
TORCH_CHECK(false, "Backend doesn't support getPinnedMemoryAllocator()");
return nullptr;
}
virtual const Generator& getDefaultGenerator(
C10_UNUSED DeviceIndex device_index = -1) const {
TORCH_CHECK(false, "Backend doesn`t support getDefaultGenerator()");
}
virtual Generator getNewGenerator(
C10_UNUSED DeviceIndex device_index = -1) const {
TORCH_CHECK(false, "Backend doesn`t support getNewGenerator()");
}
};
} // namespace at
C10_DIAGNOSTIC_POP()

View File

@ -6,13 +6,16 @@
#include <ATen/detail/AcceleratorHooksInterface.h>
// NB: Class must live in `at` due to limitations of Registry.h.
// Forward-declares at::Generator and at::cuda::NVRTC
namespace at {
// Forward-declares at::cuda::NVRTC
struct Generator;
namespace cuda {
struct NVRTC;
} // namespace cuda
} // namespace at
// NB: Class must live in `at` due to limitations of Registry.h.
namespace at {
#ifdef _MSC_VER
constexpr const char* CUDA_HELP =
@ -62,16 +65,12 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
~CUDAHooksInterface() override = default;
// Initialize THCState and, transitively, the CUDA state
void init() const override {
virtual void initCUDA() const {
TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP);
}
const Generator& getDefaultGenerator(
C10_UNUSED DeviceIndex device_index = -1) const override {
TORCH_CHECK(
false,
"Cannot get default CUDA generator without ATen_cuda library. ",
CUDA_HELP);
virtual const Generator& getDefaultCUDAGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
TORCH_CHECK(false, "Cannot get default CUDA generator without ATen_cuda library. ", CUDA_HELP);
}
virtual Device getDeviceFromPtr(void* /*data*/) const {

View File

@ -1,13 +1,19 @@
#pragma once
#include <c10/core/Allocator.h>
#include <c10/core/GeneratorImpl.h>
#include <c10/util/Exception.h>
#include <c10/util/Registry.h>
#include <ATen/detail/AcceleratorHooksInterface.h>
#include <memory>
namespace at {
class Context;
}
// NB: Class must live in `at` due to limitations of Registry.h.
namespace at {
@ -20,13 +26,13 @@ struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
// squelch -Werror=non-virtual-dtor
~HIPHooksInterface() override = default;
void init() const override {
TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library.");
// Initialize the HIP library state
virtual void initHIP() const {
AT_ERROR("Cannot initialize HIP without ATen_hip library.");
}
const Generator& getDefaultGenerator(
C10_UNUSED DeviceIndex device_index = -1) const override {
TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library.");
virtual std::unique_ptr<c10::GeneratorImpl> initHIPGenerator(Context*) const {
AT_ERROR("Cannot initialize HIP generator without ATen_hip library.");
}
virtual bool hasHIP() const {
@ -45,6 +51,10 @@ struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
AT_ERROR("Pinned memory requires HIP.");
}
virtual void registerHIPTypes(Context*) const {
AT_ERROR("Cannot registerHIPTypes() without ATen_hip library.");
}
virtual int getNumGPUs() const {
return 0;
}

View File

@ -1,33 +1,25 @@
#pragma once
#include <ATen/detail/AcceleratorHooksInterface.h>
#include <ATen/core/Generator.h>
#include <c10/core/Allocator.h>
#include <c10/util/Exception.h>
#include <c10/util/Registry.h>
namespace at {
struct TORCH_API IPUHooksInterface : AcceleratorHooksInterface {
~IPUHooksInterface() override = default;
struct TORCH_API IPUHooksInterface {
virtual ~IPUHooksInterface() = default;
void init() const override {
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
virtual const Generator& getDefaultIPUGenerator(
DeviceIndex device_index [[maybe_unused]] = -1) const {
AT_ERROR(
"Cannot get the default IPU generator: the IPU backend is not "
"available.");
}
bool hasPrimaryContext(DeviceIndex device_index) const override {
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
return false;
}
const Generator& getDefaultGenerator(
C10_UNUSED DeviceIndex device_index = -1) const override {
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
}
Generator getNewGenerator(
DeviceIndex device_index [[maybe_unused]] = -1) const override {
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
virtual Generator newIPUGenerator(DeviceIndex device_index [[maybe_unused]] = -1) const {
AT_ERROR(
"Cannot create a new IPU generator: the IPU backend is not available.");
}
};

View File

@ -3,24 +3,13 @@
#include <c10/util/Exception.h>
#include <c10/util/Registry.h>
#include <ATen/detail/AcceleratorHooksInterface.h>
// NB: Class must live in `at` due to limitations of Registry.h.
namespace at {
struct TORCH_API MAIAHooksInterface : AcceleratorHooksInterface {
struct TORCH_API MAIAHooksInterface {
// This should never actually be implemented, but it is used to
// squelch -Werror=non-virtual-dtor
~MAIAHooksInterface() override = default;
void init() const override {
TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library.");
}
bool hasPrimaryContext(DeviceIndex device_index) const override {
TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library.");
return false;
}
virtual ~MAIAHooksInterface() = default;
virtual std::string showConfig() const {
TORCH_CHECK(false, "Cannot query detailed MAIA version information.");

View File

@ -2,9 +2,9 @@
#pragma once
#include <ATen/detail/AcceleratorHooksInterface.h>
#include <c10/core/Allocator.h>
#include <ATen/core/Generator.h>
#include <ATen/detail/AcceleratorHooksInterface.h>
#include <c10/util/Exception.h>
#include <c10/util/Registry.h>
@ -22,7 +22,7 @@ struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface {
~MPSHooksInterface() override = default;
// Initialize the MPS library state
void init() const override {
virtual void initMPS() const {
FAIL_MPSHOOKS_FUNC(__func__);
}
virtual bool hasMPS() const {
@ -31,8 +31,7 @@ struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface {
virtual bool isOnMacOSorNewer(unsigned major = 13, unsigned minor = 0) const {
FAIL_MPSHOOKS_FUNC(__func__);
}
const Generator& getDefaultGenerator(
C10_UNUSED DeviceIndex device_index = -1) const override {
virtual const Generator& getDefaultMPSGenerator() const {
FAIL_MPSHOOKS_FUNC(__func__);
}
virtual Allocator* getMPSDeviceAllocator() const {

View File

@ -31,7 +31,7 @@ struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface {
~MTIAHooksInterface() override = default;
void init() const override {
virtual void initMTIA() const {
// Avoid logging here, since MTIA needs init devices first then it will know
// how many devices are available. Make it as no-op if mtia extension is not
// dynamically loaded.

View File

@ -1,20 +1,18 @@
#pragma once
#include <ATen/core/Generator.h>
#include <ATen/detail/AcceleratorHooksInterface.h>
#include <c10/core/Allocator.h>
#include <c10/core/Device.h>
#include <c10/core/Storage.h>
#include <c10/util/Exception.h>
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
namespace at {
struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
~PrivateUse1HooksInterface() override = default;
const at::Generator& getDefaultGenerator(
c10::DeviceIndex device_index) const override {
virtual const at::Generator& getDefaultGenerator(
c10::DeviceIndex device_index) const {
TORCH_CHECK_NOT_IMPLEMENTED(
false,
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDefaultGenerator`.");
@ -26,23 +24,23 @@ struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDeviceFromPtr`.");
}
bool isPinnedPtr(const void* data) const override {
virtual bool isPinnedPtr(const void* data) const override {
return false;
}
Allocator* getPinnedMemoryAllocator() const override {
virtual Allocator* getPinnedMemoryAllocator() const override {
TORCH_CHECK(
false,
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getPinnedMemoryAllocator`.");
}
bool hasPrimaryContext(DeviceIndex device_index) const override {
virtual bool hasPrimaryContext(DeviceIndex device_index) const override {
TORCH_CHECK_NOT_IMPLEMENTED(
false,
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `hasPrimaryContext`.");
}
void init() const override {}
virtual void initPrivateUse1() const {}
virtual void resizePrivateUse1Bytes(
const c10::Storage& storage,
size_t newsize) const {

View File

@ -4,6 +4,7 @@
#include <c10/util/Exception.h>
#include <c10/util/Registry.h>
#include <ATen/core/Generator.h>
#include <ATen/detail/AcceleratorHooksInterface.h>
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
@ -13,8 +14,10 @@ namespace at {
struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
~XPUHooksInterface() override = default;
void init() const override {
TORCH_CHECK(false, "Cannot initialize XPU without ATen_xpu library.");
virtual void initXPU() const {
TORCH_CHECK(
false,
"Cannot initialize XPU without ATen_xpu library.");
}
virtual bool hasXPU() const {
@ -31,15 +34,12 @@ struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
TORCH_CHECK(false, "Cannot get XPU global device index without ATen_xpu library.");
}
const Generator& getDefaultGenerator(
C10_UNUSED DeviceIndex device_index = -1) const override {
TORCH_CHECK(
false, "Cannot get default XPU generator without ATen_xpu library.");
virtual Generator getXPUGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library.");
}
Generator getNewGenerator(
C10_UNUSED DeviceIndex device_index = -1) const override {
TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library.");
virtual const Generator& getDefaultXPUGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
TORCH_CHECK(false, "Cannot get default XPU generator without ATen_xpu library.");
}
virtual DeviceIndex getNumGPUs() const {

View File

View File

@ -362,7 +362,6 @@ static std::tuple<Tensor,Tensor,Tensor> convolution_backward_plumbing(
const Tensor& grad_output_, const Tensor& input_, const Tensor& weight_,
const c10::OptionalArrayRef<SymInt> bias_sizes_opt,
c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed,
// NOLINTNEXTLINE(performance-unnecessary-value-param)
c10::SymIntArrayRef output_padding, c10::SymInt groups, std::array<bool, 3> output_mask) {
const auto maybe_layer = maybeCurrentDynamicLayer();
vmap_check_escaped(maybe_layer, "convolution_backward_plumbing");

View File

@ -458,16 +458,6 @@ inline int64_t get_bdim_size2(
TORCH_INTERNAL_ASSERT(false);
}
inline c10::SymInt get_bdim_size2_symint(
const Tensor& a_value, std::optional<int64_t> a_bdim,
const Tensor& b_value, std::optional<int64_t> b_bdim) {
if (a_bdim)
return a_value.sym_size(*a_bdim);
if (b_bdim)
return b_value.sym_size(*b_bdim);
TORCH_INTERNAL_ASSERT(false);
}
// [start, start + 1, ..., stop - 1]
inline VmapDimVector range(int64_t start, int64_t stop) {
TORCH_INTERNAL_ASSERT(stop >= start);

View File

@ -8,7 +8,7 @@
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/functorch/BatchRulesHelper.h>
namespace at::functorch {
namespace at { namespace functorch {
#define OP_DECOMPOSE(op) m.impl(#op, static_cast<decltype(&ATEN_FN(op))>(native::op));
#define OP_DECOMPOSE2(op, overload) m.impl(#op"."#overload, static_cast<decltype(&ATEN_FN2(op, overload))>(native::op));
@ -20,4 +20,4 @@ TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {
OP_DECOMPOSE(_unsafe_masked_index_put_accumulate);
}
}
}}

View File

@ -226,7 +226,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes
if (num_classes <= 0) {
AT_ERROR("Can not infer total number of classes from empty tensor.");
} else {
shape.emplace_back(num_classes);
shape.push_back(num_classes);
return at::empty_symint(shape, self.options());
}
}
@ -246,7 +246,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes
// TORCH_CHECK(num_classes > self.max().item().toLong(), "Class values must be smaller than num_classes.");
// }
shape.emplace_back(num_classes);
shape.push_back(num_classes);
Tensor ret = at::zeros_symint(shape, self.options());
return ret.scatter(-1, self.unsqueeze(-1), 1);
}

View File

@ -213,7 +213,7 @@ static std::tuple<Tensor,Tensor> native_dropout_batching_rule(const Tensor& tens
return std::make_tuple(output, mask);
}
static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_samples, const bool replacement, std::optional<Generator> generator) {
static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_samples, const bool replacement, const std::optional<Generator> generator) {
c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);
auto maybe_layer = maybeCurrentDynamicLayer();
const auto cur_level = maybe_layer->layerId();
@ -237,7 +237,7 @@ static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_sa
if (is_2D_case) {
self_value = reshape_dim_into(0, 0, self_value);
}
auto out = multinomial(self_value, num_samples, replacement, std::move(generator));
auto out = multinomial(self_value, num_samples, replacement, generator);
if (is_2D_case) {
out = reshape_dim_outof_symint(0, maybe_layer->batchSize(), out);
}
@ -249,7 +249,7 @@ static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_sa
// Must be same randomness with unbatched input
// 1D case: S -> multinomial(S) -> S
// 2D case: MS -> multinomial(MS) -> MS
return multinomial(self_value, num_samples, replacement, std::move(generator));
return multinomial(self_value, num_samples, replacement, generator);
}
template <typename A, A a, typename C>

View File

@ -58,7 +58,7 @@ static int64_t get_max_index_logical_dim(
static std::vector<std::optional<Tensor>> batchIndices(
ArrayRef<std::optional<Tensor>> indices,
ArrayRef<std::optional<int64_t>> indices_bdims,
const c10::SymInt& batch_size,
int64_t batch_size,
std::optional<int64_t> self_bdim,
std::optional<int64_t> values_bdim = std::nullopt) {
// There are 3 main cases:
@ -89,7 +89,7 @@ static std::vector<std::optional<Tensor>> batchIndices(
for (size_t i = 0; i < indices.size(); i++) {
auto index = indices[i];
if (index.has_value() && index->sym_numel() != 0) {
if (index.has_value() && index->numel() != 0) {
const auto idx_bdim = indices_bdims[i];
indices_.emplace_back(maybePadToLogicalRank(moveBatchDimToFront(index.value(), idx_bdim), idx_bdim, maxLogicalRank));
if (index.value().dtype() == kBool && indices_bdims[i].has_value()) {
@ -346,10 +346,10 @@ namespace {
// Code is mostly duplicated from
// https://github.com/pytorch/pytorch/blob/fb0e27d38a8fdab4e1c14d6378c9e41cb30fd6a3
// /aten/src/ATen/native/TensorAdvancedIndexing.cpp#L294-L312
VmapSymDimVector compute_indexed_shape(const Tensor &src, TensorList indices_list)
VmapDimVector compute_indexed_shape(const Tensor &src, TensorList indices_list)
{
int64_t dims_before = 0, dims_indexed = 0;
SymIntArrayRef replacement_shape;
IntArrayRef replacement_shape;
for (const auto dim : c10::irange(indices_list.size())) {
if (!indices_list[dim].defined()) {
if (dims_indexed == 0) {
@ -357,7 +357,7 @@ namespace {
}
} else {
dims_indexed++;
replacement_shape = indices_list[dim].sym_sizes();
replacement_shape = indices_list[dim].sizes();
}
}
@ -365,7 +365,7 @@ namespace {
// The offset in these dimensions is computed by the kernel using the index tensor's
// values and the stride of src. The new shape is not meaningful. It's used to make
// the shape compatible with the result tensor.
auto shape = VmapSymDimVector(src.sym_sizes());
auto shape = VmapDimVector(src.sizes());
int64_t end = dims_before + dims_indexed;
shape.erase(shape.begin() + dims_before, shape.begin() + end);
shape.insert(shape.begin() + dims_before, replacement_shape.begin(), replacement_shape.end());
@ -375,7 +375,7 @@ namespace {
// Code is mostly duplicated from
// https://github.com/pytorch/pytorch/blob/fb0e27d38a8fdab4e1c14d6378c9e41cb30fd6a3
// /aten/src/ATen/native/TensorAdvancedIndexing.cpp#L379-L405
VmapSymDimVector get_indexed_shape(Tensor self, const torch::List<std::optional<at::Tensor>> &orig)
VmapDimVector get_indexed_shape(Tensor self, const torch::List<std::optional<at::Tensor>> &orig)
{
at::native::checkIndexTensorTypes(orig, /*allow_int*/ true);
// first expand BoolTensor (masks) or ByteTensor (masks) into 1 or more LongTensors
@ -406,13 +406,13 @@ namespace {
ArrayRef<std::optional<int64_t>> indices_bdims,
const Tensor &values,
std::optional<int64_t> values_bdim,
std::optional<c10::SymInt> opt_batch_size = {}) {
std::optional<int64_t> opt_batch_size = {}) {
Tensor self_ = moveBatchDimToFront(self, self_bdim);
Tensor values_ = moveBatchDimToFront(values, values_bdim);
// for inplace variants `index_put_` and `_index_put_impl_` we find the batch_size
// here while for `index_put` does it outside of this function.
const auto batch_size = opt_batch_size ? opt_batch_size.value() : self_.sym_size(0);
const auto batch_size = opt_batch_size ? opt_batch_size.value() : self_.size(0);
self_ = ensure_has_bdim(self_, self_bdim.has_value(), batch_size);
values_ = ensure_has_bdim(values_, values_bdim.has_value(), batch_size);
TORCH_INTERNAL_ASSERT(indices.size() == indices_bdims.size());
@ -431,7 +431,7 @@ namespace {
// number of unit dims (for broadcasting value to indexed_shape)
auto n_unit_dims = indexed_shape.size() - values_sizes.size();
VmapSymDimVector new_values_shape(values_sizes.size() + n_unit_dims);
VmapDimVector new_values_shape(values_sizes.size() + n_unit_dims);
// add the batch-dim
new_values_shape[0] = batch_size;
@ -445,7 +445,7 @@ namespace {
// since batch and unit dims are already be filled.
new_values_shape[idx + n_unit_dims] = values_sizes[idx];
}
values_ = values_.view_symint(new_values_shape);
values_ = values_.view(new_values_shape);
}
return std::make_tuple(self_, indices_, values_);
@ -613,14 +613,14 @@ std::tuple<Tensor, std::optional<int64_t>> index_put_batch_rule(
TORCH_INTERNAL_ASSERT(indices.size() == indices_bdims.size());
// find the batch_size
c10::SymInt batch_size = 0;
int64_t batch_size = 0;
if (self_bdim || values_bdim) {
batch_size = get_bdim_size2_symint(self, self_bdim, values, values_bdim);
batch_size = get_bdim_size2(self, self_bdim, values, values_bdim);
} else {
// one or more of the indices is batched.
for (size_t i = 0; i < indices.size(); i++) {
if (indices_bdims[i] && indices[i].has_value()) {
batch_size = indices[i].value().sym_size(*indices_bdims[i]);
batch_size = indices[i].value().size(*indices_bdims[i]);
break;
}
}

View File

@ -102,7 +102,7 @@ static Tensor moveDimToFrontAndExpand(Tensor tensor, std::optional<int64_t> dim,
} else {
tensor = tensor.unsqueeze(0);
auto expanded_sizes = tensor.sym_sizes().vec();
expanded_sizes[0] = std::move(size);
expanded_sizes[0] = size;
tensor = tensor.expand_symint(expanded_sizes);
}
return tensor;

View File

@ -4,6 +4,7 @@
#include <ATen/WrapDimUtils.h>
#include <ATen/functorch/TensorWrapper.h>
#include <ATen/functorch/BatchedTensorImpl.h>
#include <ATen/ATen.h>
#include <ATen/Dispatch.h>
#include <c10/util/irange.h>
#include <ATen/NamedTensorUtils.h>

View File

@ -12,15 +12,14 @@ namespace at::mps {
// The real implementation of MPSHooksInterface
struct MPSHooks : public at::MPSHooksInterface {
MPSHooks(at::MPSHooksArgs) {}
void init() const override;
void initMPS() const override;
// MPSDevice interface
bool hasMPS() const override;
bool isOnMacOSorNewer(unsigned major, unsigned minor) const override;
// MPSGeneratorImpl interface
const Generator& getDefaultGenerator(
DeviceIndex device_index = -1) const override;
const Generator& getDefaultMPSGenerator() const override;
// MPSStream interface
void deviceSynchronize() const override;

View File

@ -10,7 +10,7 @@
namespace at::mps {
void MPSHooks::init() const {
void MPSHooks::initMPS() const {
C10_LOG_API_USAGE_ONCE("aten.init.mps");
// TODO: initialize MPS devices and streams here
}
@ -59,7 +59,7 @@ Allocator* MPSHooks::getMPSDeviceAllocator() const {
return at::mps::GetMPSAllocator();
}
const Generator& MPSHooks::getDefaultGenerator([[maybe_unused]] DeviceIndex device_index) const {
const Generator& MPSHooks::getDefaultMPSGenerator() const {
return at::mps::detail::getDefaultMPSGenerator();
}

View File

View File

@ -207,7 +207,6 @@ TORCH_META_FUNC(mm)(const Tensor & self, const Tensor & mat2) {
TORCH_META_FUNC(linalg_vector_norm)(const Tensor& self, const Scalar& scalar_ord, OptionalIntArrayRef opt_dim, bool keepdim, std::optional<ScalarType> opt_dtype) {
at::native::checkFloatingOrComplex(self, "linalg.vector_norm");
TORCH_CHECK(!at::isComplexType(scalar_ord.type()), "linalg.vector_norm: Expected a non-complex scalar as the order of norm.");
auto dim = opt_dim.value_or(IntArrayRef{});
// Casting a large integer to a double will just introduce an error for
@ -2893,7 +2892,6 @@ Tensor linalg_matrix_norm(
bool keepdim,
std::optional<ScalarType> opt_dtype) {
// Check ord first as it will be used in the dtype check of A
TORCH_CHECK(!at::isComplexType(scalar_ord.type()), "linalg.matrix_norm: Expected a non-complex scalar as the order of norm.");
auto ord = scalar_ord.toDouble();
auto abs_ord = std::abs(ord);
TORCH_CHECK(abs_ord == 2. || abs_ord == 1. || abs_ord == INFINITY, "linalg.matrix_norm: Order ", ord, " not supported.");

View File

@ -2291,7 +2291,7 @@ bool cpu_equal(const Tensor& self, const Tensor& other) {
other_data += strides[1];
}
});
}), kBool, kBFloat16, kHalf, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
}), kBool, kBFloat16, kHalf, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
return result.load();
}

View File

@ -5,8 +5,6 @@
#include <ATen/Dispatch.h>
#include <c10/util/irange.h>
#include <limits>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#include <ATen/NativeFunctions.h>
@ -35,17 +33,6 @@ Tensor _bincount_cpu_template(
AT_ERROR("bincount only supports 1-d non-negative integral inputs.");
}
// Ensure max_val < 2 ^ 63 - 1 (9223372036854775807)
auto max_val = *self.max().data_ptr<input_t>();
if (max_val >= std::numeric_limits<int64_t>::max()) {
AT_ERROR(
"maximum value of input overflowed, it should be < ",
std::numeric_limits<int64_t>::max(),
" but got ",
max_val
);
}
bool has_weights = weights.defined();
if (has_weights && (weights.dim() != 1 || weights.size(0) != self.size(0))) {
AT_ERROR("weights should be 1-d and have the same length as input");
@ -53,7 +40,7 @@ Tensor _bincount_cpu_template(
Tensor output;
int64_t self_size = self.size(0);
int64_t nbins = static_cast<int64_t>(max_val) + 1L;
int64_t nbins = static_cast<int64_t>(*self.max().data_ptr<input_t>()) + 1L;
nbins = std::max(nbins, minlength); // at least minlength # of bins
const input_t* self_p = self.const_data_ptr<input_t>();

View File

@ -1435,8 +1435,8 @@ Tensor & index_select_out_cpu_(const Tensor & self, int64_t dim, const Tensor &
});
});
} else {
AT_DISPATCH_V2(
self.scalar_type(), "index_select", AT_WRAP([&index_contig, &self, &result, &dim, &numel] {
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND4(ScalarType::ComplexHalf, ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16,
self.scalar_type(), "index_select", [&index_contig, &self, &result, &dim, &numel] {
auto self_stride = self.dim() == 0 ? 1 : self.stride(dim);
auto result_stride = result.dim() == 0 ? 1 : result.stride(dim);
@ -1453,7 +1453,7 @@ Tensor & index_select_out_cpu_(const Tensor & self, int64_t dim, const Tensor &
*(result_data_ptr + i * result_stride) = *self_ip;
}
});
}), AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), ScalarType::ComplexHalf, ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16, AT_EXPAND(AT_FLOAT8_TYPES));
});
}
}

View File

@ -106,7 +106,7 @@ inline Tensor& fill_empty_deterministic_(Tensor& tensor) {
AT_DISPATCH_V2(
tensor.scalar_type(), "fill_empty_deterministic_", AT_WRAP([&]() {
tensor.fill_(std::numeric_limits<scalar_t>::quiet_NaN());
}), AT_EXPAND(AT_FLOATING_TYPES), AT_EXPAND(AT_COMPLEX_TYPES), AT_EXPAND(AT_FLOAT8_TYPES), kBFloat16, kHalf, kComplexHalf);
}), AT_EXPAND(AT_FLOATING_TYPES), AT_EXPAND(AT_COMPLEX_TYPES), AT_EXPAND(AT_FLOAT8_TYPES), kBFloat16, kHalf);
} else {
AT_DISPATCH_V2(
tensor.scalar_type(), "fill_empty_deterministic_", AT_WRAP([&]() {

View File

@ -44,7 +44,7 @@ Scalar _local_scalar_dense_cuda(const Tensor& self) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
at::cuda::memcpy_and_sync((void *)value.const_data_ptr<scalar_t>(), self.const_data_ptr<scalar_t>(), sizeof(scalar_t), cudaMemcpyDeviceToHost, stream);
r = Scalar(*value.const_data_ptr<scalar_t>());
}), AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
}), AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
#if defined(USE_ROCM)
} else {
auto cpu_self = self.cpu();

View File

@ -285,64 +285,44 @@ struct Copy<dst_t, c10::complex<float>> {
}
};
#define AT_DISPATCH_SOURCE_TYPES(TYPE, NAME, ...) \
AT_DISPATCH_SWITCH( \
TYPE, \
NAME, \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Byte, \
src_t, \
__VA_ARGS__) AT_PRIVATE_CASE_TYPE_USING_HINT(at::ScalarType::Char, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Long, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Short, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Int, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Double, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Float, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::ComplexDouble, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::ComplexFloat, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Half, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::BFloat16, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Bool, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType:: \
Float8_e4m3fn, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType:: \
Float8_e4m3fnuz, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType:: \
Float8_e5m2, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType:: \
Float8_e5m2fnuz, \
src_t, \
__VA_ARGS__))
#define AT_DISPATCH_SOURCE_TYPES(TYPE, NAME, ...) \
AT_DISPATCH_SWITCH( \
TYPE, \
NAME, \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Byte, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Char, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Long, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Short, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Int, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Double, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Float, src_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::ComplexDouble, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::ComplexFloat, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Half, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::BFloat16, \
src_t, \
__VA_ARGS__) \
AT_PRIVATE_CASE_TYPE_USING_HINT( \
at::ScalarType::Bool, \
src_t, \
__VA_ARGS__))
namespace {
@ -430,14 +410,10 @@ void foreach_tensor_copy_list_kernel_cuda_(
std::vector<std::vector<at::Tensor>> tensor_lists{src.vec(), self.vec()};
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND7(
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(
ScalarType::Half,
ScalarType::BFloat16,
ScalarType::Bool,
ScalarType::Float8_e4m3fn,
ScalarType::Float8_e4m3fnuz,
ScalarType::Float8_e5m2,
ScalarType::Float8_e5m2fnuz,
self[0].scalar_type(),
"foreach_tensor_copy",
[&]() {

Some files were not shown because too many files have changed in this diff Show More