mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-02 14:34:54 +08:00
Compare commits
38 Commits
zeros-and-
...
findhao/op
| Author | SHA1 | Date | |
|---|---|---|---|
| 1d983f0775 | |||
| f320e4ba86 | |||
| 73dfb2bc2d | |||
| f2654ae713 | |||
| ab40b51c5d | |||
| 8c1b793071 | |||
| de205901f3 | |||
| 9f2936931a | |||
| 5fa8031ae5 | |||
| a245137d76 | |||
| 2d93c5f720 | |||
| 6f3b42a073 | |||
| db4c9a54a2 | |||
| f78da95bc5 | |||
| 7c2bc74a72 | |||
| 7b366a2b70 | |||
| b437ffe8b0 | |||
| 085e2f5416 | |||
| 425ad9ccdb | |||
| 900671f799 | |||
| 1f30017712 | |||
| 1fdf24d9a5 | |||
| 8a4bc3cc09 | |||
| 78f5027b48 | |||
| 6e2d4c661a | |||
| 30dd419560 | |||
| f280038562 | |||
| 0bb482185c | |||
| 18c2804981 | |||
| a6b6bbc293 | |||
| ebd4755b0d | |||
| 8779577950 | |||
| a6d9a506c3 | |||
| a0ecd4f45d | |||
| 9cdac0662b | |||
| 57be1aae4b | |||
| 22ee74895b | |||
| ea97de291b |
@ -355,12 +355,6 @@ case "$image" in
|
||||
CONDA_CMAKE=yes
|
||||
VISION=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3-clang18-asan)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
CLANG_VERSION=18
|
||||
CONDA_CMAKE=yes
|
||||
VISION=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.9-gcc11)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
@ -387,13 +381,6 @@ case "$image" in
|
||||
HALIDE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.12-triton-cpu)
|
||||
CUDA_VERSION=12.4
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=11
|
||||
CONDA_CMAKE=yes
|
||||
TRITON_CPU=yes
|
||||
;;
|
||||
pytorch-linux-focal-linter)
|
||||
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||
# We will need to update mypy version eventually, but that's for another day. The task
|
||||
@ -523,7 +510,6 @@ docker build \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
|
||||
--build-arg "TRITON=${TRITON}" \
|
||||
--build-arg "TRITON_CPU=${TRITON_CPU}" \
|
||||
--build-arg "ONNX=${ONNX}" \
|
||||
--build-arg "DOCS=${DOCS}" \
|
||||
--build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
|
||||
|
||||
@ -1 +0,0 @@
|
||||
6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f
|
||||
@ -13,17 +13,11 @@ if [ -n "$CLANG_VERSION" ]; then
|
||||
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
|
||||
# work around ubuntu apt-get conflicts
|
||||
sudo apt-get -y -f install
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
if [[ $CLANG_VERSION == 18 ]]; then
|
||||
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
|
||||
fi
|
||||
fi
|
||||
|
||||
sudo apt-get update
|
||||
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
|
||||
if [[ $CLANG_VERSION == 18 ]]; then
|
||||
apt-get install -y --no-install-recommends libomp-18-dev
|
||||
fi
|
||||
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
|
||||
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
|
||||
|
||||
# Install dev version of LLVM.
|
||||
if [ -n "$LLVMDEV" ]; then
|
||||
|
||||
@ -32,7 +32,7 @@ pip_install coloredlogs packaging
|
||||
|
||||
pip_install onnxruntime==1.18.1
|
||||
pip_install onnx==1.16.2
|
||||
pip_install onnxscript==0.1.0.dev20241008 --no-deps
|
||||
pip_install onnxscript==0.1.0.dev20240831 --no-deps
|
||||
# required by onnxscript
|
||||
pip_install ml_dtypes
|
||||
|
||||
|
||||
@ -15,11 +15,8 @@ conda_reinstall() {
|
||||
if [ -n "${XPU_VERSION}" ]; then
|
||||
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
|
||||
TRITON_TEXT_FILE="triton-xpu"
|
||||
elif [ -n "${TRITON_CPU}" ]; then
|
||||
TRITON_REPO="https://github.com/triton-lang/triton-cpu"
|
||||
TRITON_TEXT_FILE="triton-cpu"
|
||||
else
|
||||
TRITON_REPO="https://github.com/triton-lang/triton"
|
||||
TRITON_REPO="https://github.com/openai/triton"
|
||||
TRITON_TEXT_FILE="triton"
|
||||
fi
|
||||
|
||||
@ -47,10 +44,9 @@ chown -R jenkins /var/lib/jenkins/triton
|
||||
chgrp -R jenkins /var/lib/jenkins/triton
|
||||
pushd /var/lib/jenkins/
|
||||
|
||||
as_jenkins git clone --recursive ${TRITON_REPO} triton
|
||||
as_jenkins git clone ${TRITON_REPO} triton
|
||||
cd triton
|
||||
as_jenkins git checkout ${TRITON_PINNED_COMMIT}
|
||||
as_jenkins git submodule update --init --recursive
|
||||
cd python
|
||||
|
||||
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
|
||||
|
||||
@ -139,9 +139,9 @@ opt-einsum==3.3
|
||||
#Pinned versions: 3.3
|
||||
#test that import: test_linalg.py
|
||||
|
||||
optree==0.13.0
|
||||
optree==0.12.1
|
||||
#Description: A library for tree manipulation
|
||||
#Pinned versions: 0.13.0
|
||||
#Pinned versions: 0.12.1
|
||||
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
|
||||
#test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
|
||||
#common_utils.py, test_eager_transforms.py, test_python_dispatch.py,
|
||||
|
||||
@ -147,13 +147,6 @@ COPY ci_commit_pins/triton.txt triton.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt
|
||||
|
||||
ARG TRITON_CPU
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
|
||||
RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton-cpu.txt
|
||||
|
||||
ARG EXECUTORCH
|
||||
# Build and install executorch
|
||||
COPY ./common/install_executorch.sh install_executorch.sh
|
||||
|
||||
@ -178,7 +178,7 @@ fi
|
||||
# sccache will fail for CUDA builds if all cores are used for compiling
|
||||
# gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
|
||||
if [ -z "$MAX_JOBS" ]; then
|
||||
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; } && which sccache > /dev/null; then
|
||||
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
|
||||
export MAX_JOBS=$(($(nproc) - 1))
|
||||
fi
|
||||
fi
|
||||
@ -218,6 +218,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
|
||||
export USE_PRECOMPILED_HEADERS=1
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build* ]]; then
|
||||
export USE_GLOO_WITH_OPENSSL=ON
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
|
||||
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
|
||||
fi
|
||||
|
||||
@ -191,22 +191,9 @@ function install_torchrec_and_fbgemm() {
|
||||
pip_uninstall torchrec-nightly
|
||||
pip_uninstall fbgemm-gpu-nightly
|
||||
pip_install setuptools-git-versioning scikit-build pyre-extensions
|
||||
|
||||
# TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it
|
||||
# seems to be an sccache-related issue
|
||||
if [[ "$IS_A100_RUNNER" == "1" ]]; then
|
||||
unset CMAKE_CUDA_COMPILER_LAUNCHER
|
||||
sudo mv /opt/cache/bin /opt/cache/bin-backup
|
||||
fi
|
||||
|
||||
# See https://github.com/pytorch/pytorch/issues/106971
|
||||
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
|
||||
|
||||
if [[ "$IS_A100_RUNNER" == "1" ]]; then
|
||||
export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
|
||||
sudo mv /opt/cache/bin-backup /opt/cache/bin
|
||||
fi
|
||||
}
|
||||
|
||||
function clone_pytorch_xla() {
|
||||
|
||||
@ -403,7 +403,7 @@ pr_time_benchmarks() {
|
||||
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
|
||||
echo "benchmark results on current PR: "
|
||||
cat "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
|
||||
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "$TEST_REPORTS_DIR/new_expected_results.csv"
|
||||
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
|
||||
}
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then
|
||||
@ -606,11 +606,6 @@ test_inductor_halide() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_inductor_triton_cpu() {
|
||||
python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_dynamo_benchmark() {
|
||||
# Usage: test_dynamo_benchmark huggingface 0
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
@ -665,6 +660,15 @@ test_inductor_torchbench_smoketest_perf() {
|
||||
# The threshold value needs to be actively maintained to make this check useful
|
||||
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
|
||||
|
||||
TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
|
||||
--export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
|
||||
# The threshold value needs to be actively maintained to make this check useful
|
||||
# The perf number of nanogpt seems not very stable, e.g.
|
||||
# https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
|
||||
# and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
|
||||
# we switch to use some other model.
|
||||
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
|
||||
|
||||
# Check memory compression ratio for a few models
|
||||
for test in hf_Albert timm_vision_transformer; do
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
|
||||
@ -1435,8 +1439,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
|
||||
test_inductor_distributed
|
||||
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
|
||||
test_inductor_halide
|
||||
elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
|
||||
test_inductor_triton_cpu
|
||||
elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
|
||||
test_inductor_micro_benchmark
|
||||
elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
|
||||
@ -1460,7 +1462,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
# https://github.com/opencv/opencv-python/issues/885
|
||||
pip_install opencv-python==4.8.0.74
|
||||
if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
|
||||
checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
|
||||
checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
|
||||
checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \
|
||||
|
||||
@ -26,7 +26,7 @@ fi
|
||||
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
|
||||
|
||||
set +ex
|
||||
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=pythoncapi_compat.h --exclude=eval_frame.c torch/
|
||||
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
|
||||
PYLONG_API_CHECK=$?
|
||||
if [[ $PYLONG_API_CHECK == 0 ]]; then
|
||||
echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"
|
||||
|
||||
@ -27,11 +27,12 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
|
||||
source activate testenv >/dev/null
|
||||
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
|
||||
python_path="/opt/python/cp\$python_nodot-cp\${python_nodot}"
|
||||
if [[ "\$python_nodot" = *t ]]; then
|
||||
python_digits="\$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
|
||||
python_path="/opt/python/cp\$python_digits-cp\${python_digits}t"
|
||||
# Prior to Python 3.8 paths were suffixed with an 'm'
|
||||
if [[ -d "\${python_path}/bin" ]]; then
|
||||
export PATH="\${python_path}/bin:\$PATH"
|
||||
elif [[ -d "\${python_path}m/bin" ]]; then
|
||||
export PATH="\${python_path}m/bin:\$PATH"
|
||||
fi
|
||||
export PATH="\${python_path}/bin:\$PATH"
|
||||
fi
|
||||
|
||||
EXTRA_CONDA_FLAGS=""
|
||||
|
||||
@ -44,9 +44,7 @@ ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
ForEachMacros:
|
||||
- FOR_EACH_RANGE
|
||||
- FOR_EACH
|
||||
ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
|
||||
IncludeCategories:
|
||||
- Regex: '^<.*\.h(pp)?>'
|
||||
Priority: 1
|
||||
@ -60,24 +58,6 @@ IndentWrappedFunctionNames: false
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
Macros:
|
||||
- >-
|
||||
PyObject_HEAD_INIT(type)={
|
||||
/* this is not exactly match with PyObject_HEAD_INIT in Python source code
|
||||
* but it is enough for clang-format */
|
||||
{ 0xFFFFFFFF },
|
||||
(type)
|
||||
},
|
||||
- >-
|
||||
PyVarObject_HEAD_INIT(type, size)={
|
||||
{
|
||||
/* manually expand PyObject_HEAD_INIT(type) above
|
||||
* because clang-format do not support recursive expansion */
|
||||
{ 0xFFFFFFFF },
|
||||
(type)
|
||||
},
|
||||
(size)
|
||||
},
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
@ -99,11 +79,7 @@ SpacesInContainerLiterals: true
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: c++17
|
||||
StatementMacros:
|
||||
- PyObject_HEAD
|
||||
- PyObject_VAR_HEAD
|
||||
- PyException_HEAD
|
||||
Standard: Cpp11
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
---
|
||||
|
||||
38
.github/ISSUE_TEMPLATE.md
vendored
Normal file
38
.github/ISSUE_TEMPLATE.md
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
If you have a question or would like help and support, please ask at our
|
||||
[forums](https://discuss.pytorch.org/).
|
||||
|
||||
If you are submitting a feature request, please preface the title with [feature request].
|
||||
If you are submitting a bug report, please fill in the following details.
|
||||
|
||||
## Issue description
|
||||
|
||||
Provide a short description.
|
||||
|
||||
## Code example
|
||||
|
||||
Please try to provide a minimal example to repro the bug.
|
||||
Error messages and stack traces are also helpful.
|
||||
|
||||
## System Info
|
||||
Please copy and paste the output from our
|
||||
[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py)
|
||||
(or fill out the checklist below manually).
|
||||
|
||||
You can get the script and run it with:
|
||||
```
|
||||
wget https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py
|
||||
# For security purposes, please check the contents of collect_env.py before running it.
|
||||
python collect_env.py
|
||||
```
|
||||
|
||||
- PyTorch or Caffe2:
|
||||
- How you installed PyTorch (conda, pip, source):
|
||||
- Build command you used (if compiling from source):
|
||||
- OS:
|
||||
- PyTorch version:
|
||||
- Python version:
|
||||
- CUDA/cuDNN version:
|
||||
- GPU models and configuration:
|
||||
- GCC version (if compiling from source):
|
||||
- CMake version:
|
||||
- Versions of any other relevant libraries:
|
||||
6
.github/actions/checkout-pytorch/action.yml
vendored
6
.github/actions/checkout-pytorch/action.yml
vendored
@ -18,14 +18,8 @@ inputs:
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Check if in a container runner
|
||||
shell: bash
|
||||
id: check_container_runner
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Clean workspace
|
||||
shell: bash
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
env:
|
||||
NO_SUDO: ${{ inputs.no-sudo }}
|
||||
run: |
|
||||
|
||||
30
.github/actions/linux-test/action.yml
vendored
30
.github/actions/linux-test/action.yml
vendored
@ -85,25 +85,15 @@ runs:
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
|
||||
- name: Check if in a container runner
|
||||
- name: Check if in a ARC runner
|
||||
shell: bash
|
||||
id: check_container_runner
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
id: check_arc_runner
|
||||
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
id: install-nvidia-driver
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
|
||||
- name: Setup GPU_FLAG for docker run
|
||||
id: setup-gpu-flag
|
||||
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
|
||||
|
||||
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
|
||||
id: setup-sscache-port-flag
|
||||
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
|
||||
|
||||
- name: Lock NVIDIA A100 40GB Frequency
|
||||
shell: bash
|
||||
@ -111,7 +101,7 @@ runs:
|
||||
sudo nvidia-smi -pm 1
|
||||
sudo nvidia-smi -ac 1215,1410
|
||||
nvidia-smi
|
||||
if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
if: contains(matrix.runner, 'a100')
|
||||
|
||||
- name: Start monitoring script
|
||||
id: monitor-script
|
||||
@ -182,7 +172,6 @@ runs:
|
||||
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
|
||||
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
SCCACHE_REGION: us-east-1
|
||||
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
|
||||
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
|
||||
DOCKER_IMAGE: ${{ inputs.docker-image }}
|
||||
@ -192,9 +181,6 @@ runs:
|
||||
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
|
||||
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ inputs.HUGGING_FACE_HUB_TOKEN }}
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
|
||||
|
||||
shell: bash
|
||||
run: |
|
||||
set -x
|
||||
@ -213,7 +199,6 @@ runs:
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e GITHUB_ACTIONS \
|
||||
@ -242,7 +227,6 @@ runs:
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e SCCACHE_REGION \
|
||||
-e SCCACHE_S3_KEY_PREFIX \
|
||||
-e XLA_CUDA \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
@ -250,9 +234,7 @@ runs:
|
||||
-e PYTORCH_TEST_RERUN_DISABLED_TESTS \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
-e HUGGING_FACE_HUB_TOKEN \
|
||||
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
|
||||
-e DASHBOARD_TAG \
|
||||
-e IS_A100_RUNNER \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
@ -323,7 +305,7 @@ runs:
|
||||
|
||||
- name: Teardown Linux
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@main
|
||||
if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
|
||||
if: always()
|
||||
|
||||
# NB: We are currently having an intermittent GPU-related issue on G5 runners with
|
||||
# A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
|
||||
|
||||
12
.github/actions/setup-linux/action.yml
vendored
12
.github/actions/setup-linux/action.yml
vendored
@ -28,14 +28,14 @@ runs:
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
echo "system info $(uname -a)"
|
||||
|
||||
- name: Check if in a container runner
|
||||
- name: Check if in a ARC runner
|
||||
shell: bash
|
||||
id: check_container_runner
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
id: check_arc_runner
|
||||
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Start docker if docker deamon is not running
|
||||
shell: bash
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
|
||||
run: |
|
||||
if systemctl is-active --quiet docker; then
|
||||
echo "Docker daemon is running...";
|
||||
@ -73,7 +73,7 @@ runs:
|
||||
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
|
||||
- name: Kill any existing containers, clean up images
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
|
||||
shell: bash
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
@ -116,7 +116,7 @@ runs:
|
||||
- name: Check that the docker daemon is running
|
||||
shell: bash
|
||||
continue-on-error: true
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
|
||||
if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'true' }}
|
||||
run: |
|
||||
set +x
|
||||
|
||||
|
||||
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
||||
3f0569939c4369bec943fc27d1c9d8dfbc828c26
|
||||
ba696ea3dfec4cbe693bf06a84c75dc196077f5b
|
||||
|
||||
1
.github/pytorch-probot.yml
vendored
1
.github/pytorch-probot.yml
vendored
@ -16,7 +16,6 @@ ciflow_push_tags:
|
||||
- ciflow/nightly
|
||||
- ciflow/periodic
|
||||
- ciflow/rocm
|
||||
- ciflow/s390
|
||||
- ciflow/slow
|
||||
- ciflow/trunk
|
||||
- ciflow/unstable
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# iOS simulator requirements
|
||||
coremltools==5.0b5
|
||||
protobuf==3.20.2
|
||||
optree==0.13.0
|
||||
optree==0.12.1
|
||||
|
||||
@ -27,7 +27,7 @@ pytest-cpp==2.3.0
|
||||
rockset==1.0.3
|
||||
z3-solver==4.12.2.0
|
||||
tensorboard==2.13.0
|
||||
optree==0.13.0
|
||||
optree==0.12.1
|
||||
# NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
|
||||
# which the stringify metadata is wrong when escaping double quote
|
||||
protobuf==3.20.2
|
||||
|
||||
10
.github/scripts/generate_binary_build_matrix.py
vendored
10
.github/scripts/generate_binary_build_matrix.py
vendored
@ -333,7 +333,7 @@ def generate_wheels_matrix(
|
||||
package_type = "manywheel"
|
||||
|
||||
if python_versions is None:
|
||||
python_versions = FULL_PYTHON_VERSIONS + ["3.13", "3.13t"]
|
||||
python_versions = FULL_PYTHON_VERSIONS + ["3.13"]
|
||||
|
||||
if arches is None:
|
||||
# Define default compute archivectures
|
||||
@ -369,13 +369,7 @@ def generate_wheels_matrix(
|
||||
# TODO: Enable python 3.13 on rocm, aarch64, windows
|
||||
if (
|
||||
gpu_arch_type == "rocm" or (os != "linux" and os != "linux-s390x")
|
||||
) and (python_version == "3.13" or python_version == "3.13t"):
|
||||
continue
|
||||
|
||||
# TODO: Enable python 3.13t on xpu and cpu-s390x
|
||||
if (
|
||||
gpu_arch_type == "xpu" or gpu_arch_type == "cpu-s390x"
|
||||
) and python_version == "3.13t":
|
||||
) and python_version == "3.13":
|
||||
continue
|
||||
|
||||
if use_split_build and (
|
||||
|
||||
67
.github/scripts/runner_determinator.py
vendored
67
.github/scripts/runner_determinator.py
vendored
@ -1,9 +1,5 @@
|
||||
# flake8: noqa: G004
|
||||
|
||||
# Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
|
||||
# must be kept in sync. You can do it easily by running the following command:
|
||||
# python .github/scripts/update_runner_determinator.py
|
||||
|
||||
"""
|
||||
This runner determinator is used to determine which set of runners to run a
|
||||
GitHub job on. It uses the first comment of a GitHub issue (by default
|
||||
@ -83,9 +79,6 @@ class Experiment(NamedTuple):
|
||||
rollout_perc: float = (
|
||||
0 # Percentage of workflows to experiment on when user is not opted-in.
|
||||
)
|
||||
all_branches: bool = (
|
||||
False # If True, the experiment is also enabled on the exception branches
|
||||
)
|
||||
|
||||
# Add more fields as needed
|
||||
|
||||
@ -219,7 +212,7 @@ def get_potential_pr_author(
|
||||
|
||||
def is_exception_branch(branch: str) -> bool:
|
||||
"""
|
||||
Branches that get opted out of experiments by default, until they're explicitly enabled.
|
||||
Branches that get opted out of all experiments and should always use Meta runners
|
||||
"""
|
||||
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
|
||||
|
||||
@ -345,10 +338,7 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -
|
||||
|
||||
|
||||
def get_runner_prefix(
|
||||
rollout_state: str,
|
||||
workflow_requestors: Iterable[str],
|
||||
branch: str,
|
||||
is_canary: bool = False,
|
||||
rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
|
||||
) -> str:
|
||||
settings = parse_settings(rollout_state)
|
||||
user_optins = parse_users(rollout_state)
|
||||
@ -358,12 +348,6 @@ def get_runner_prefix(
|
||||
for experiment_name, experiment_settings in settings.experiments.items():
|
||||
enabled = False
|
||||
|
||||
if not experiment_settings.all_branches and is_exception_branch(branch):
|
||||
log.info(
|
||||
f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
|
||||
)
|
||||
continue
|
||||
|
||||
# Is any workflow_requestor opted in to this experiment?
|
||||
opted_in_users = [
|
||||
requestor
|
||||
@ -423,34 +407,35 @@ def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
|
||||
runner_label_prefix = DEFAULT_LABEL_PREFIX
|
||||
|
||||
try:
|
||||
rollout_state = get_rollout_state_from_issue(
|
||||
args.github_token, args.github_issue_repo, args.github_issue
|
||||
if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
|
||||
log.info(
|
||||
f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
|
||||
)
|
||||
runner_label_prefix = DEFAULT_LABEL_PREFIX
|
||||
else:
|
||||
try:
|
||||
rollout_state = get_rollout_state_from_issue(
|
||||
args.github_token, args.github_issue_repo, args.github_issue
|
||||
)
|
||||
|
||||
username = get_potential_pr_author(
|
||||
args.github_token,
|
||||
args.github_repo,
|
||||
args.github_actor,
|
||||
args.github_ref_type,
|
||||
args.github_branch,
|
||||
)
|
||||
username = get_potential_pr_author(
|
||||
args.github_token,
|
||||
args.github_repo,
|
||||
args.github_actor,
|
||||
args.github_ref_type,
|
||||
args.github_branch,
|
||||
)
|
||||
|
||||
is_canary = args.github_repo == "pytorch/pytorch-canary"
|
||||
is_canary = args.github_repo == "pytorch/pytorch-canary"
|
||||
|
||||
runner_label_prefix = get_runner_prefix(
|
||||
rollout_state,
|
||||
(args.github_issue_owner, username),
|
||||
args.github_branch,
|
||||
is_canary,
|
||||
)
|
||||
runner_label_prefix = get_runner_prefix(
|
||||
rollout_state, (args.github_issue_owner, username), is_canary
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
|
||||
)
|
||||
|
||||
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
|
||||
|
||||
|
||||
87
.github/scripts/test_runner_determinator.py
vendored
87
.github/scripts/test_runner_determinator.py
vendored
@ -4,10 +4,6 @@ from unittest.mock import Mock, patch
|
||||
import runner_determinator as rd
|
||||
|
||||
|
||||
USER_BRANCH = "somebranch"
|
||||
EXCEPTION_BRANCH = "main"
|
||||
|
||||
|
||||
class TestRunnerDeterminatorIssueParser(TestCase):
|
||||
def test_parse_settings(self) -> None:
|
||||
settings_text = """
|
||||
@ -70,40 +66,6 @@ class TestRunnerDeterminatorIssueParser(TestCase):
|
||||
"otherExp settings not parsed correctly",
|
||||
)
|
||||
|
||||
def test_parse_all_branches_setting(self) -> None:
|
||||
settings_text = """
|
||||
```
|
||||
experiments:
|
||||
lf:
|
||||
rollout_perc: 25
|
||||
all_branches: true
|
||||
otherExp:
|
||||
all_branches: True
|
||||
rollout_perc: 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Users:
|
||||
@User1,lf
|
||||
@User2,lf,otherExp
|
||||
|
||||
"""
|
||||
|
||||
settings = rd.parse_settings(settings_text)
|
||||
|
||||
self.assertTupleEqual(
|
||||
rd.Experiment(rollout_perc=25, all_branches=True),
|
||||
settings.experiments["lf"],
|
||||
"lf settings not parsed correctly",
|
||||
)
|
||||
self.assertTrue(settings.experiments["otherExp"].all_branches)
|
||||
self.assertTupleEqual(
|
||||
rd.Experiment(rollout_perc=0, all_branches=True),
|
||||
settings.experiments["otherExp"],
|
||||
"otherExp settings not parsed correctly",
|
||||
)
|
||||
|
||||
def test_parse_users(self) -> None:
|
||||
settings_text = """
|
||||
experiments:
|
||||
@ -157,7 +119,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
|
||||
@User2,lf,otherExp
|
||||
|
||||
"""
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User1"])
|
||||
self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
|
||||
|
||||
def test_opted_in_user_two_experiments(self) -> None:
|
||||
@ -174,7 +136,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
|
||||
@User2,lf,otherExp
|
||||
|
||||
"""
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User2"])
|
||||
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for User2")
|
||||
|
||||
@patch("random.uniform", return_value=50)
|
||||
@ -192,7 +154,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
|
||||
@User2,lf,otherExp
|
||||
|
||||
"""
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User3"])
|
||||
self.assertEqual("", prefix, "Runner prefix not correct for user")
|
||||
|
||||
@patch("random.uniform", return_value=10)
|
||||
@ -212,7 +174,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
|
||||
"""
|
||||
|
||||
# User3 is opted out, but is pulled into both experiments by the 10% rollout
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User3"])
|
||||
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
|
||||
|
||||
def test_lf_prefix_always_comes_first(self) -> None:
|
||||
@ -230,7 +192,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
|
||||
|
||||
"""
|
||||
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User2"])
|
||||
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
|
||||
|
||||
def test_ignores_commented_users(self) -> None:
|
||||
@ -248,7 +210,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
|
||||
|
||||
"""
|
||||
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User1"])
|
||||
self.assertEqual("", prefix, "Runner prefix not correct for user")
|
||||
|
||||
def test_ignores_extra_experiments(self) -> None:
|
||||
@ -267,44 +229,9 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
|
||||
|
||||
"""
|
||||
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User1"])
|
||||
self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
|
||||
|
||||
def test_disables_experiment_on_exception_branches_when_not_explicitly_opted_in(
|
||||
self,
|
||||
) -> None:
|
||||
settings_text = """
|
||||
experiments:
|
||||
lf:
|
||||
rollout_perc: 100
|
||||
---
|
||||
|
||||
Users:
|
||||
@User,lf,otherExp
|
||||
|
||||
"""
|
||||
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
|
||||
self.assertEqual("", prefix, "Runner prefix not correct for user")
|
||||
|
||||
def test_allows_experiment_on_exception_branches_when_explicitly_opted_in(
|
||||
self,
|
||||
) -> None:
|
||||
settings_text = """
|
||||
experiments:
|
||||
lf:
|
||||
rollout_perc: 100
|
||||
all_branches: true
|
||||
---
|
||||
|
||||
Users:
|
||||
@User,lf,otherExp
|
||||
|
||||
"""
|
||||
|
||||
prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
|
||||
self.assertEqual("lf.", prefix, "Runner prefix not correct for user")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
35
.github/scripts/test_trymerge.py
vendored
35
.github/scripts/test_trymerge.py
vendored
@ -12,7 +12,7 @@ import json
|
||||
import os
|
||||
import warnings
|
||||
from hashlib import sha256
|
||||
from typing import Any, List, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
from unittest import main, mock, skip, TestCase
|
||||
from urllib.error import HTTPError
|
||||
|
||||
@ -24,6 +24,7 @@ from trymerge import (
|
||||
find_matching_merge_rule,
|
||||
get_classifications,
|
||||
get_drci_classifications,
|
||||
get_rockset_results,
|
||||
gh_get_team_members,
|
||||
GitHubPR,
|
||||
JobCheckState,
|
||||
@ -41,6 +42,7 @@ if "GIT_REMOTE_URL" not in os.environ:
|
||||
os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"
|
||||
|
||||
GQL_MOCKS = "gql_mocks.json.gz"
|
||||
ROCKSET_MOCKS = "rockset_mocks.json.gz"
|
||||
DRCI_MOCKS = "drci_mocks.json.gz"
|
||||
|
||||
|
||||
@ -75,11 +77,16 @@ def mock_query(
|
||||
if err.code == 401 or err.code == 403:
|
||||
err_msg = f"If you are seeing this message during workflow run, please make sure to update {file_name}"
|
||||
err_msg += f" locally, by deleting it and running {os.path.basename(__file__)} with"
|
||||
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN"
|
||||
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN,"
|
||||
err_msg += " the rockset api key passed via ROCKSET_API_KEY,"
|
||||
err_msg += " and drci api key passed via DRCI_BOT_KEY environment variables"
|
||||
if os.getenv("GITHUB_TOKEN") is None or os.getenv("DRCI_BOT_KEY") is None:
|
||||
if (
|
||||
os.getenv("GITHUB_TOKEN") is None
|
||||
or os.getenv("ROCKSET_API_KEY") is None
|
||||
or os.getenv("DRCI_BOT_KEY") is None
|
||||
):
|
||||
err_msg = (
|
||||
"Failed to update cached queries as GITHUB_TOKEN or DRCI_BOT_KEY "
|
||||
"Failed to update cached queries as GITHUB_TOKEN or ROCKSET_API_KEY or DRCI_BOT_KEY "
|
||||
+ "is not defined. "
|
||||
+ err_msg
|
||||
)
|
||||
@ -103,6 +110,16 @@ def mocked_gh_graphql(query: str, **kwargs: Any) -> Any:
|
||||
return mock_query(gh_graphql_wrapper, GQL_MOCKS, key_function, query, kwargs)
|
||||
|
||||
|
||||
def mocked_rockset_results(head_sha: str, merge_base: str, num_retries: int = 3) -> Any:
|
||||
return mock_query(
|
||||
get_rockset_results,
|
||||
ROCKSET_MOCKS,
|
||||
lambda x, y: f"{x} {y}",
|
||||
head_sha,
|
||||
merge_base,
|
||||
)
|
||||
|
||||
|
||||
def mocked_drci_classifications(pr_num: int, project: str, num_retries: int = 3) -> Any:
|
||||
return mock_query(
|
||||
get_drci_classifications,
|
||||
@ -256,6 +273,10 @@ def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
|
||||
]
|
||||
|
||||
|
||||
def empty_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
|
||||
return []
|
||||
|
||||
|
||||
class DummyGitRepo(GitRepo):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(get_git_repo_dir(), get_git_remote_name())
|
||||
@ -267,6 +288,7 @@ class DummyGitRepo(GitRepo):
|
||||
return "super awsome commit message"
|
||||
|
||||
|
||||
@mock.patch("trymerge.get_rockset_results", side_effect=empty_rockset_results)
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch(
|
||||
"trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
|
||||
@ -582,6 +604,7 @@ class TestTryMerge(TestCase):
|
||||
mocked_gh_fetch_merge_base.assert_called_once()
|
||||
|
||||
|
||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||
@mock.patch(
|
||||
@ -820,7 +843,7 @@ class TestBypassFailures(TestCase):
|
||||
checks = pr.get_checkrun_conclusions()
|
||||
|
||||
# Known flaky failure takes precedence over ignore current (need to set the
|
||||
# merge base here to get the results from Dr. CI, and that categorize the
|
||||
# merge base here to get the results from Rockset, and that categorize the
|
||||
# broken trunk failure too
|
||||
checks = get_classifications(
|
||||
pr.pr_num,
|
||||
@ -906,6 +929,7 @@ class TestBypassFailures(TestCase):
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||
@mock.patch("trymerge.get_drci_classifications", return_value={})
|
||||
@ -984,6 +1008,7 @@ class TestBypassFailuresOnSandCastle(TestCase):
|
||||
self.assertTrue(len(failed) == 2)
|
||||
|
||||
|
||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||
@mock.patch(
|
||||
|
||||
51
.github/scripts/trymerge.py
vendored
51
.github/scripts/trymerge.py
vendored
@ -452,6 +452,8 @@ RE_DIFF_REV = re.compile(r"^Differential Revision:.+?(D[0-9]+)", re.MULTILINE)
|
||||
CIFLOW_LABEL = re.compile(r"^ciflow/.+")
|
||||
CIFLOW_TRUNK_LABEL = re.compile(r"^ciflow/trunk")
|
||||
MERGE_RULE_PATH = Path(".github") / "merge_rules.yaml"
|
||||
ROCKSET_MERGES_COLLECTION = "merges"
|
||||
ROCKSET_MERGES_WORKSPACE = "commons"
|
||||
REMOTE_MAIN_BRANCH = "origin/main"
|
||||
DRCI_CHECKRUN_NAME = "Dr.CI"
|
||||
INTERNAL_CHANGES_CHECKRUN_NAME = "Meta Internal-Only Changes Check"
|
||||
@ -1178,7 +1180,7 @@ class GitHubPR:
|
||||
merge_commit_sha = repo.rev_parse(name=self.default_branch())
|
||||
|
||||
if comment_id and self.pr_num:
|
||||
# Finally, upload the record to s3. The list of pending and failed
|
||||
# Finally, upload the record to Rockset. The list of pending and failed
|
||||
# checks are at the time of the merge
|
||||
save_merge_record(
|
||||
comment_id=comment_id,
|
||||
@ -1200,7 +1202,7 @@ class GitHubPR:
|
||||
ignore_current=bool(ignore_current_checks),
|
||||
)
|
||||
else:
|
||||
print("Missing comment ID or PR number, couldn't upload to s3")
|
||||
print("Missing comment ID or PR number, couldn't upload to Rockset")
|
||||
|
||||
# Usually Github will see that the commit has "resolves <pr_num>" in the
|
||||
# commit message and close the PR, but sometimes it doesn't, leading to
|
||||
@ -1479,7 +1481,7 @@ def find_matching_merge_rule(
|
||||
|
||||
# Categorize all checks when skip_mandatory_checks (force merge) is set. Do it here
|
||||
# where the list of checks is readily available. These records will be saved into
|
||||
# s3 merge records
|
||||
# Rockset merge records
|
||||
(
|
||||
pending_mandatory_checks,
|
||||
failed_mandatory_checks,
|
||||
@ -1566,7 +1568,7 @@ def save_merge_record(
|
||||
This saves the merge records as a json, which can later be uploaded to s3
|
||||
"""
|
||||
|
||||
# Prepare the record to be written into s3
|
||||
# Prepare the record to be written into Rockset
|
||||
data = [
|
||||
{
|
||||
"comment_id": comment_id,
|
||||
@ -1588,8 +1590,7 @@ def save_merge_record(
|
||||
"ignore_current": ignore_current,
|
||||
"error": error,
|
||||
# This is a unique identifier for the record for deduping purposes
|
||||
# in Rockset. Any unique string would work. This will not be used
|
||||
# after we migrate off Rockset
|
||||
# in rockset. Any unique string would work
|
||||
"_id": f"{project}-{pr_num}-{comment_id}-{os.environ.get('GITHUB_RUN_ID')}",
|
||||
}
|
||||
]
|
||||
@ -1599,6 +1600,36 @@ def save_merge_record(
|
||||
json.dump(data, f)
|
||||
|
||||
|
||||
@retries_decorator(rc=[])
|
||||
def get_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
|
||||
query = f"""
|
||||
SELECT
|
||||
w.name as workflow_name,
|
||||
j.id,
|
||||
j.name,
|
||||
j.conclusion,
|
||||
j.completed_at,
|
||||
j.html_url,
|
||||
j.head_sha,
|
||||
j.torchci_classification.captures as failure_captures,
|
||||
LENGTH(j.steps) as steps,
|
||||
FROM
|
||||
commons.workflow_job j join commons.workflow_run w on w.id = j.run_id
|
||||
where
|
||||
j.head_sha in ('{head_sha}','{merge_base}')
|
||||
"""
|
||||
try:
|
||||
import rockset # type: ignore[import]
|
||||
|
||||
res = rockset.RocksetClient(
|
||||
host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
|
||||
).sql(query)
|
||||
return cast(List[Dict[str, Any]], res.results)
|
||||
except ModuleNotFoundError:
|
||||
print("Could not use RockSet as rocket dependency is missing")
|
||||
return []
|
||||
|
||||
|
||||
@retries_decorator()
|
||||
def get_drci_classifications(pr_num: int, project: str = "pytorch") -> Any:
|
||||
"""
|
||||
@ -2036,7 +2067,7 @@ def categorize_checks(
|
||||
pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
|
||||
failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
|
||||
|
||||
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on s3
|
||||
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
|
||||
failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)
|
||||
|
||||
# If required_checks is not set or empty, consider all names are relevant
|
||||
@ -2095,7 +2126,7 @@ def categorize_checks(
|
||||
):
|
||||
failed_checks = failed_checks + flaky_or_broken_trunk
|
||||
|
||||
# The list of failed_checks_categorization is returned so that it can be saved into the s3 merge record
|
||||
# The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
|
||||
return (pending_checks, failed_checks, failed_checks_categorization)
|
||||
|
||||
|
||||
@ -2379,7 +2410,7 @@ def main() -> None:
|
||||
handle_exception(e)
|
||||
|
||||
if args.comment_id and args.pr_num:
|
||||
# Finally, upload the record to s3, we don't have access to the
|
||||
# Finally, upload the record to Rockset, we don't have access to the
|
||||
# list of pending and failed checks here, but they are not really
|
||||
# needed at the moment
|
||||
save_merge_record(
|
||||
@ -2402,7 +2433,7 @@ def main() -> None:
|
||||
error=str(e),
|
||||
)
|
||||
else:
|
||||
print("Missing comment ID or PR number, couldn't upload to s3")
|
||||
print("Missing comment ID or PR number, couldn't upload to Rockset")
|
||||
finally:
|
||||
if not args.check_mergeability:
|
||||
gh_remove_label(
|
||||
|
||||
31
.github/scripts/update_runner_determinator.py
vendored
31
.github/scripts/update_runner_determinator.py
vendored
@ -1,31 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
|
||||
|
||||
# Read the contents of runner_determinator.py
|
||||
with open(".github/scripts/runner_determinator.py") as script_file:
|
||||
script_content = script_file.read()
|
||||
|
||||
# Indent the script content by 10 spaces to match destination indentation
|
||||
indented_script_content = "\n".join(
|
||||
[" " * 10 + line if line else line for line in script_content.splitlines()]
|
||||
)
|
||||
|
||||
# Read the contents of _runner-determinator.yml
|
||||
with open(".github/workflows/_runner-determinator.yml") as yml_file:
|
||||
yml_content = yml_file.read()
|
||||
|
||||
# Replace the content between the markers
|
||||
new_yml_content = re.sub(
|
||||
r"(cat <<EOF > runner_determinator.py\n)(.*?)(\n\s+EOF)",
|
||||
lambda match: match.group(1) + indented_script_content + match.group(3),
|
||||
yml_content,
|
||||
flags=re.DOTALL,
|
||||
)
|
||||
|
||||
# Save the modified content back to _runner-determinator.yml
|
||||
with open(".github/workflows/_runner-determinator.yml", "w") as yml_file:
|
||||
yml_file.write(new_yml_content)
|
||||
|
||||
print("Updated _runner-determinator.yml with the contents of runner_determinator.py")
|
||||
@ -68,7 +68,6 @@ jobs:
|
||||
needs: get-label-type
|
||||
with:!{{ upload.binary_env_as_input(config) }}
|
||||
{%- if "aarch64" in build_environment %}
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
{%- elif "s390x" in build_environment %}
|
||||
@ -103,7 +102,6 @@ jobs:
|
||||
build_name: !{{ config["build_name"] }}
|
||||
build_environment: !{{ build_environment }}
|
||||
{%- if "aarch64" in build_environment %}
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.2xlarge
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
{%- elif "s390x" in build_environment %}
|
||||
|
||||
8
.github/workflows/_bazel-build-test.yml
vendored
8
.github/workflows/_bazel-build-test.yml
vendored
@ -91,14 +91,14 @@ jobs:
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
|
||||
- name: Check if in a container runner
|
||||
- name: Check if in a ARC runner
|
||||
shell: bash
|
||||
id: check_container_runner
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
id: check_arc_runner
|
||||
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
||||
if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
if: ${{ inputs.cuda-version != 'cpu' && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
|
||||
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
|
||||
27
.github/workflows/_linux-test.yml
vendored
27
.github/workflows/_linux-test.yml
vendored
@ -114,32 +114,22 @@ jobs:
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
|
||||
- name: Check if in a container runner
|
||||
- name: Check if in a ARC runner
|
||||
shell: bash
|
||||
id: check_container_runner
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
id: check_arc_runner
|
||||
run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
id: install-nvidia-driver
|
||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
|
||||
- name: Setup GPU_FLAG for docker run
|
||||
id: setup-gpu-flag
|
||||
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
|
||||
|
||||
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
|
||||
id: setup-sscache-port-flag
|
||||
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
|
||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
|
||||
|
||||
- name: Lock NVIDIA A100 40GB Frequency
|
||||
run: |
|
||||
sudo nvidia-smi -pm 1
|
||||
sudo nvidia-smi -ac 1215,1410
|
||||
nvidia-smi
|
||||
if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
if: contains(matrix.runner, 'a100')
|
||||
|
||||
- name: Start monitoring script
|
||||
id: monitor-script
|
||||
@ -218,7 +208,6 @@ jobs:
|
||||
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
|
||||
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
SCCACHE_REGION: us-east-1
|
||||
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
|
||||
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
|
||||
DOCKER_IMAGE: ${{ inputs.docker-image }}
|
||||
@ -229,7 +218,6 @@ jobs:
|
||||
DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
|
||||
|
||||
run: |
|
||||
set -x
|
||||
@ -248,7 +236,6 @@ jobs:
|
||||
# shellcheck disable=SC2086,SC2090
|
||||
container_name=$(docker run \
|
||||
${GPU_FLAG:-} \
|
||||
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e PR_NUMBER \
|
||||
-e GITHUB_ACTIONS \
|
||||
@ -278,7 +265,6 @@ jobs:
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e SCCACHE_REGION \
|
||||
-e SCCACHE_S3_KEY_PREFIX \
|
||||
-e XLA_CUDA \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
@ -288,7 +274,6 @@ jobs:
|
||||
-e HUGGING_FACE_HUB_TOKEN \
|
||||
-e SCRIBE_GRAPHQL_ACCESS_TOKEN \
|
||||
-e DASHBOARD_TAG \
|
||||
-e IS_A100_RUNNER \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
@ -358,7 +343,7 @@ jobs:
|
||||
|
||||
- name: Teardown Linux
|
||||
uses: pytorch/test-infra/.github/actions/teardown-linux@main
|
||||
if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
|
||||
if: always()
|
||||
|
||||
# NB: We are currently having an intermittent GPU-related issue on G5 runners with
|
||||
# A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
|
||||
|
||||
67
.github/workflows/_runner-determinator.yml
vendored
67
.github/workflows/_runner-determinator.yml
vendored
@ -59,10 +59,6 @@ jobs:
|
||||
cat <<EOF > runner_determinator.py
|
||||
# flake8: noqa: G004
|
||||
|
||||
# Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
|
||||
# must be kept in sync. You can do it easily by running the following command:
|
||||
# python .github/scripts/update_runner_determinator.py
|
||||
|
||||
"""
|
||||
This runner determinator is used to determine which set of runners to run a
|
||||
GitHub job on. It uses the first comment of a GitHub issue (by default
|
||||
@ -142,9 +138,6 @@ jobs:
|
||||
rollout_perc: float = (
|
||||
0 # Percentage of workflows to experiment on when user is not opted-in.
|
||||
)
|
||||
all_branches: bool = (
|
||||
False # If True, the experiment is also enabled on the exception branches
|
||||
)
|
||||
|
||||
# Add more fields as needed
|
||||
|
||||
@ -278,7 +271,7 @@ jobs:
|
||||
|
||||
def is_exception_branch(branch: str) -> bool:
|
||||
"""
|
||||
Branches that get opted out of experiments by default, until they're explicitly enabled.
|
||||
Branches that get opted out of all experiments and should always use Meta runners
|
||||
"""
|
||||
return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
|
||||
|
||||
@ -404,10 +397,7 @@ jobs:
|
||||
|
||||
|
||||
def get_runner_prefix(
|
||||
rollout_state: str,
|
||||
workflow_requestors: Iterable[str],
|
||||
branch: str,
|
||||
is_canary: bool = False,
|
||||
rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
|
||||
) -> str:
|
||||
settings = parse_settings(rollout_state)
|
||||
user_optins = parse_users(rollout_state)
|
||||
@ -417,12 +407,6 @@ jobs:
|
||||
for experiment_name, experiment_settings in settings.experiments.items():
|
||||
enabled = False
|
||||
|
||||
if not experiment_settings.all_branches and is_exception_branch(branch):
|
||||
log.info(
|
||||
f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
|
||||
)
|
||||
continue
|
||||
|
||||
# Is any workflow_requestor opted in to this experiment?
|
||||
opted_in_users = [
|
||||
requestor
|
||||
@ -482,34 +466,35 @@ jobs:
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
|
||||
runner_label_prefix = DEFAULT_LABEL_PREFIX
|
||||
|
||||
try:
|
||||
rollout_state = get_rollout_state_from_issue(
|
||||
args.github_token, args.github_issue_repo, args.github_issue
|
||||
if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
|
||||
log.info(
|
||||
f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
|
||||
)
|
||||
runner_label_prefix = DEFAULT_LABEL_PREFIX
|
||||
else:
|
||||
try:
|
||||
rollout_state = get_rollout_state_from_issue(
|
||||
args.github_token, args.github_issue_repo, args.github_issue
|
||||
)
|
||||
|
||||
username = get_potential_pr_author(
|
||||
args.github_token,
|
||||
args.github_repo,
|
||||
args.github_actor,
|
||||
args.github_ref_type,
|
||||
args.github_branch,
|
||||
)
|
||||
username = get_potential_pr_author(
|
||||
args.github_token,
|
||||
args.github_repo,
|
||||
args.github_actor,
|
||||
args.github_ref_type,
|
||||
args.github_branch,
|
||||
)
|
||||
|
||||
is_canary = args.github_repo == "pytorch/pytorch-canary"
|
||||
is_canary = args.github_repo == "pytorch/pytorch-canary"
|
||||
|
||||
runner_label_prefix = get_runner_prefix(
|
||||
rollout_state,
|
||||
(args.github_issue_owner, username),
|
||||
args.github_branch,
|
||||
is_canary,
|
||||
)
|
||||
runner_label_prefix = get_runner_prefix(
|
||||
rollout_state, (args.github_issue_owner, username), is_canary
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
|
||||
)
|
||||
|
||||
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
|
||||
|
||||
|
||||
2
.github/workflows/_win-test.yml
vendored
2
.github/workflows/_win-test.yml
vendored
@ -189,7 +189,7 @@ jobs:
|
||||
run: |
|
||||
pushd "${PYTORCH_FINAL_PACKAGE_DIR}"
|
||||
# shellcheck disable=SC2046,SC2102
|
||||
python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.13.0
|
||||
python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.12.1
|
||||
popd
|
||||
|
||||
.ci/pytorch/win-test.sh
|
||||
|
||||
7
.github/workflows/build-triton-wheel.yml
vendored
7
.github/workflows/build-triton-wheel.yml
vendored
@ -43,7 +43,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
py_vers: [ "3.9", "3.10", "3.11", "3.12" ]
|
||||
py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
|
||||
device: ["cuda", "rocm", "xpu"]
|
||||
include:
|
||||
- device: "rocm"
|
||||
@ -91,6 +91,9 @@ jobs:
|
||||
|
||||
# Determine python executable for given version
|
||||
case $PY_VERS in
|
||||
3.8)
|
||||
PYTHON_EXECUTABLE=/opt/python/cp38-cp38/bin/python
|
||||
;;
|
||||
3.9)
|
||||
PYTHON_EXECUTABLE=/opt/python/cp39-cp39/bin/python
|
||||
;;
|
||||
@ -211,7 +214,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
py_vers: [ "3.9", "3.10", "3.11", "3.12" ]
|
||||
py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
|
||||
timeout-minutes: 40
|
||||
env:
|
||||
DOCKER_IMAGE: pytorch/conda-builder:cpu
|
||||
|
||||
5
.github/workflows/docker-builds.yml
vendored
5
.github/workflows/docker-builds.yml
vendored
@ -67,7 +67,6 @@ jobs:
|
||||
pytorch-linux-jammy-py3.12-halide,
|
||||
pytorch-linux-jammy-xpu-2024.0-py3,
|
||||
pytorch-linux-jammy-py3-clang15-asan,
|
||||
pytorch-linux-jammy-py3-clang18-asan,
|
||||
pytorch-linux-focal-py3-clang10-onnx,
|
||||
pytorch-linux-focal-linter,
|
||||
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter,
|
||||
@ -79,9 +78,7 @@ jobs:
|
||||
- docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks
|
||||
runner: linux.arm64.m7g.4xlarge
|
||||
timeout-minutes: 600
|
||||
# Docker uploads fail from LF runners, see https://github.com/pytorch/pytorch/pull/137358
|
||||
# runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
|
||||
runs-on: "${{ matrix.runner }}"
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
|
||||
env:
|
||||
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${{ matrix.docker-image-name }}
|
||||
steps:
|
||||
|
||||
12
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
12
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -60,7 +60,6 @@ jobs:
|
||||
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_9-cpu-aarch64
|
||||
@ -87,7 +86,6 @@ jobs:
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.2xlarge
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
secrets:
|
||||
@ -132,7 +130,6 @@ jobs:
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_9-cuda-aarch64
|
||||
@ -180,7 +177,6 @@ jobs:
|
||||
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cpu-aarch64
|
||||
@ -207,7 +203,6 @@ jobs:
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.2xlarge
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
secrets:
|
||||
@ -252,7 +247,6 @@ jobs:
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cuda-aarch64
|
||||
@ -300,7 +294,6 @@ jobs:
|
||||
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cpu-aarch64
|
||||
@ -327,7 +320,6 @@ jobs:
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.2xlarge
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
secrets:
|
||||
@ -372,7 +364,6 @@ jobs:
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.11"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cuda-aarch64
|
||||
@ -420,7 +411,6 @@ jobs:
|
||||
DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cpu-aarch64
|
||||
@ -447,7 +437,6 @@ jobs:
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.2xlarge
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
secrets:
|
||||
@ -492,7 +481,6 @@ jobs:
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.12"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cuda-aarch64
|
||||
|
||||
350
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
350
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
@ -3324,353 +3324,3 @@ jobs:
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu
|
||||
GPU_ARCH_TYPE: cpu
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cpu
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cpu-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cpu-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu
|
||||
GPU_ARCH_TYPE: cpu
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cpu
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cpu-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cpu-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu
|
||||
GPU_ARCH_TYPE: cpu
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cpu-cxx11-abi-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu-cxx11-abi
|
||||
GPU_ARCH_TYPE: cpu-cxx11-abi
|
||||
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cpu-cxx11-abi
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cpu-cxx11-abi-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cpu-cxx11-abi-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu-cxx11-abi
|
||||
GPU_ARCH_TYPE: cpu-cxx11-abi
|
||||
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cpu-cxx11-abi
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cpu-cxx11-abi-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cpu-cxx11-abi-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu-cxx11-abi
|
||||
GPU_ARCH_TYPE: cpu-cxx11-abi
|
||||
DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cpu-cxx11-abi
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda11_8-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu118
|
||||
GPU_ARCH_VERSION: 11.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda11_8
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda11_8-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cuda11_8-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu118
|
||||
GPU_ARCH_VERSION: 11.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda11_8
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda11_8-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cuda11_8-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu118
|
||||
GPU_ARCH_VERSION: 11.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda11_8
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda12_1-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_1
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_1-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cuda12_1-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_1
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_1-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cuda12_1-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_1
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda12_4-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu124
|
||||
GPU_ARCH_VERSION: 12.4
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_4
|
||||
build_environment: linux-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_4-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cuda12_4-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu124
|
||||
GPU_ARCH_VERSION: 12.4
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_4
|
||||
build_environment: linux-binary-manywheel
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_4-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cuda12_4-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu124
|
||||
GPU_ARCH_VERSION: 12.4
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_4
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
280
.github/workflows/generated-linux-binary-manywheel-split-nightly.yml
generated
vendored
280
.github/workflows/generated-linux-binary-manywheel-split-nightly.yml
generated
vendored
@ -1514,283 +1514,3 @@ jobs:
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda11_8-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu118
|
||||
GPU_ARCH_VERSION: 11.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda11_8
|
||||
build_environment: linux-binary-manywheel-split
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda11_8-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cuda11_8-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu118
|
||||
GPU_ARCH_VERSION: 11.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda11_8
|
||||
build_environment: linux-binary-manywheel-split
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda11_8-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cuda11_8-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu118
|
||||
GPU_ARCH_VERSION: 11.8
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda11_8
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda12_1-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_1
|
||||
build_environment: linux-binary-manywheel-split
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_1-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cuda12_1-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_1
|
||||
build_environment: linux-binary-manywheel-split
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_1-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cuda12_1-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_1
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cuda12_4-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu124
|
||||
GPU_ARCH_VERSION: 12.4
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cuda12_4
|
||||
build_environment: linux-binary-manywheel-split
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_4-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cuda12_4-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu124
|
||||
GPU_ARCH_VERSION: 12.4
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_4
|
||||
build_environment: linux-binary-manywheel-split
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cuda12_4-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cuda12_4-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu124
|
||||
GPU_ARCH_VERSION: 12.4
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cuda12_4
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13t-cpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu
|
||||
GPU_ARCH_TYPE: cpu
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_13t-cpu
|
||||
build_environment: linux-binary-manywheel-split
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cpu-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_13t-cpu-build
|
||||
- get-label-type
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu
|
||||
GPU_ARCH_TYPE: cpu
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cpu
|
||||
build_environment: linux-binary-manywheel-split
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13t-cpu-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_13t-cpu-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cpu
|
||||
GPU_ARCH_TYPE: cpu
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
|
||||
use_split_build: True
|
||||
DESIRED_PYTHON: "3.13t"
|
||||
build_name: manywheel-py3_13t-cpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
22
.github/workflows/inductor.yml
vendored
22
.github/workflows/inductor.yml
vendored
@ -120,28 +120,6 @@ jobs:
|
||||
test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cpu-py3_12-inductor-triton-cpu-build:
|
||||
name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
build-environment: linux-jammy-py3.12-gcc11
|
||||
docker-image-name: pytorch-linux-jammy-py3.12-triton-cpu
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor-triton-cpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
|
||||
]}
|
||||
|
||||
linux-jammy-cpu-py3_12-inductor-triton-cpu-test:
|
||||
name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-jammy-cpu-py3_12-inductor-triton-cpu-build
|
||||
with:
|
||||
build-environment: linux-jammy-py3.12-gcc11
|
||||
docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.test-matrix }}
|
||||
|
||||
linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
|
||||
# Should be synced with the one in inductor-periodic.yml but this only runs inductor_timm
|
||||
name: cuda12.4-py3.10-gcc9-sm86
|
||||
|
||||
3
.github/workflows/lint-autoformat.yml
vendored
3
.github/workflows/lint-autoformat.yml
vendored
@ -11,6 +11,7 @@ jobs:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
runs-on: lf.linux.2xlarge
|
||||
continue-on-error: true
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
steps:
|
||||
- name: Checkout pytorch
|
||||
@ -30,12 +31,10 @@ jobs:
|
||||
bash .github/scripts/lintrunner.sh
|
||||
- name: Check for changes
|
||||
id: git-check
|
||||
continue-on-error: true
|
||||
run: |
|
||||
git diff --exit-code || echo "changes=true" >> "$GITHUB_OUTPUT"
|
||||
- name: Suggest changes
|
||||
if: steps.git-check.outputs.changes == 'true'
|
||||
continue-on-error: true
|
||||
uses: parkerbxyz/suggest-changes@v1
|
||||
with:
|
||||
comment: "Please commit the suggested changes from pytorch's linter."
|
||||
|
||||
1
.github/workflows/lint.yml
vendored
1
.github/workflows/lint.yml
vendored
@ -223,7 +223,6 @@ jobs:
|
||||
cache: pip
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python3 -m pip install --upgrade pip
|
||||
pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.2.* fbscribelogger==0.1.* numpy==1.24.*
|
||||
pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
|
||||
- name: Run run_test.py (nonretryable)
|
||||
|
||||
31
.github/workflows/periodic.yml
vendored
31
.github/workflows/periodic.yml
vendored
@ -57,10 +57,10 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
linux-focal-cuda12_1-py3_10-gcc9-test:
|
||||
@ -89,10 +89,10 @@ jobs:
|
||||
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
|
||||
@ -118,10 +118,9 @@ jobs:
|
||||
docker-image-name: pytorch-linux-jammy-py3.9-gcc11
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
]}
|
||||
|
||||
parallelnative-linux-jammy-py3_9-gcc11-test:
|
||||
@ -340,10 +339,10 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
|
||||
|
||||
30
.github/workflows/pull.yml
vendored
30
.github/workflows/pull.yml
vendored
@ -185,10 +185,10 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-py3.9-clang10
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
@ -217,10 +217,10 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-py3.11-clang10
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
@ -251,10 +251,10 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-py3.12-clang10
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "dynamo", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
@ -588,9 +588,9 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-py3.12-clang10
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 3, runner: "linux.4xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 3, runner: "linux.4xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 3, runner: "linux.4xlarge" },
|
||||
{ config: "default", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
|
||||
{ config: "default", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
|
||||
{ config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
|
||||
{ config: "dynamo", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
|
||||
{ config: "dynamo", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
|
||||
{ config: "dynamo", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
|
||||
|
||||
24
.github/workflows/s390.yml
vendored
24
.github/workflows/s390.yml
vendored
@ -1,24 +0,0 @@
|
||||
name: s390
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- ciflow/s390/*
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions: read-all
|
||||
|
||||
jobs:
|
||||
linux-manylinux-2_28-py3-cpu-s390x-build:
|
||||
name: linux-manylinux-2_28-py3-cpu-s390x
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-s390x-binary-manywheel
|
||||
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||
runner: linux.s390x
|
||||
16
.github/workflows/trunk.yml
vendored
16
.github/workflows/trunk.yml
vendored
@ -266,10 +266,10 @@ jobs:
|
||||
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
|
||||
@ -316,3 +316,11 @@ jobs:
|
||||
build-environment: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
|
||||
docker-image: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
|
||||
|
||||
linux-manylinux-2_28-py3-cpu-s390x-build:
|
||||
name: linux-manylinux-2_28-py3-cpu-s390x
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-s390x-binary-manywheel
|
||||
docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x-main
|
||||
runner: linux.s390x
|
||||
|
||||
3
.github/workflows/trymerge.yml
vendored
3
.github/workflows/trymerge.yml
vendored
@ -28,7 +28,7 @@ jobs:
|
||||
check-latest: false
|
||||
cache: pip
|
||||
architecture: x64
|
||||
- run: pip install pyyaml==6.0
|
||||
- run: pip install pyyaml==6.0 rockset==1.0.3
|
||||
|
||||
- name: Setup committer id
|
||||
run: |
|
||||
@ -43,6 +43,7 @@ jobs:
|
||||
COMMENT_ID: ${{ github.event.client_payload.comment_id }}
|
||||
REBASE: ${{ github.event.client_payload.rebase }}
|
||||
IGNORE_CURRENT: ${{ github.event.client_payload.ignore_current }}
|
||||
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
|
||||
DRCI_BOT_KEY: ${{ secrets.DRCI_BOT_KEY }}
|
||||
GITHUB_RUN_ID: ${{ github.run_id }}
|
||||
run: |
|
||||
|
||||
@ -153,7 +153,7 @@ init_command = [
|
||||
'junitparser==2.1.1',
|
||||
'rich==10.9.0',
|
||||
'pyyaml==6.0.1',
|
||||
'optree==0.13.0',
|
||||
'optree==0.12.1',
|
||||
]
|
||||
|
||||
[[linter]]
|
||||
@ -216,10 +216,6 @@ include_patterns = [
|
||||
'torch/csrc/*.cpp',
|
||||
'torch/csrc/**/*.h',
|
||||
'torch/csrc/**/*.cpp',
|
||||
'torch/csrc/distributed/autograd/**/*.cpp',
|
||||
'torch/csrc/distributed/autograd/**/*.h',
|
||||
'torch/csrc/distributed/rpc/**/*.cpp',
|
||||
'torch/csrc/distributed/rpc/**/*.h',
|
||||
'torch/csrc/jit/serialization/*.h',
|
||||
'torch/csrc/jit/serialization/*.cpp',
|
||||
]
|
||||
|
||||
@ -1083,16 +1083,8 @@ if(NOT MSVC)
|
||||
append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
message(Warning "Applying -Og optimization for aarch64 GCC debug build to workaround ICE")
|
||||
endif()
|
||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
|
||||
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
|
||||
else()
|
||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
|
||||
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
|
||||
endif()
|
||||
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
|
||||
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
|
||||
append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
|
||||
|
||||
@ -121,7 +121,7 @@ torch/profiler/ @aaronenyeshi @sraikund16
|
||||
test/functorch/test_aotdispatch.py @ezyang @Chillee
|
||||
|
||||
# Dataloader
|
||||
torch/utils/data/ @andrewkho @divyanshk
|
||||
torch/utils/data/ @andrewkho @gokulavasan
|
||||
|
||||
# hipify
|
||||
torch/utils/hipify/ @jeffdaily @jithunnair-amd
|
||||
|
||||
@ -39,16 +39,25 @@ class TORCH_API Context {
|
||||
|
||||
const Generator& defaultGenerator(Device device) {
|
||||
c10::DeviceType device_type = device.type();
|
||||
lazyInitDevice(device_type);
|
||||
|
||||
initCUDAIfNeeded(device_type);
|
||||
initHIPIfNeeded(device_type);
|
||||
if (device_type == at::kCPU) {
|
||||
return at::detail::getDefaultCPUGenerator();
|
||||
} else if (device_type == at::kCUDA) {
|
||||
return at::detail::getCUDAHooks().getDefaultCUDAGenerator(device.index());
|
||||
} else if (device_type == at::kMPS) {
|
||||
return at::detail::getMPSHooks().getDefaultMPSGenerator();
|
||||
} else if (device_type == at::kXPU) {
|
||||
return at::detail::getXPUHooks().getDefaultXPUGenerator(device.index());
|
||||
} else if (device_type == at::kIPU) {
|
||||
return at::detail::getIPUHooks().getDefaultIPUGenerator(device.index());
|
||||
} else if (device_type == at::kPrivateUse1) {
|
||||
return at::detail::getPrivateUse1Hooks().getDefaultGenerator(
|
||||
device.index());
|
||||
} else {
|
||||
return getAcceleratorHooksInterface(device_type)
|
||||
.getDefaultGenerator(device.index());
|
||||
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
|
||||
}
|
||||
}
|
||||
|
||||
const AcceleratorHooksInterface& getAcceleratorHooksInterface(
|
||||
std::optional<c10::DeviceType> opt_device_type = std::nullopt) {
|
||||
c10::DeviceType device_type = opt_device_type.has_value()
|
||||
@ -71,10 +80,10 @@ class TORCH_API Context {
|
||||
c10::DeviceTypeName(device_type), " device type not an accelerator.");
|
||||
}
|
||||
}
|
||||
|
||||
Device getDeviceFromPtr(void* data, c10::DeviceType device_type) {
|
||||
lazyInitDevice(device_type);
|
||||
|
||||
initCUDAIfNeeded(device_type);
|
||||
initHIPIfNeeded(device_type);
|
||||
initXPUIfNeeded(device_type);
|
||||
if (device_type == at::kCPU) {
|
||||
return c10::DeviceType::CPU;
|
||||
} else if (device_type == at::kCUDA) {
|
||||
@ -87,7 +96,6 @@ class TORCH_API Context {
|
||||
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
|
||||
}
|
||||
}
|
||||
|
||||
bool isPinnedPtr(
|
||||
const void* data,
|
||||
std::optional<c10::DeviceType> device_type = std::nullopt) {
|
||||
@ -98,22 +106,13 @@ class TORCH_API Context {
|
||||
opt_device_type.value())) { // passed device not an accelerator
|
||||
return false;
|
||||
}
|
||||
return getAcceleratorHooksInterface(opt_device_type).isPinnedPtr(data);
|
||||
return getAcceleratorHooksInterface(opt_device_type.value())
|
||||
.isPinnedPtr(data);
|
||||
}
|
||||
|
||||
Allocator* getPinnedMemoryAllocator(
|
||||
std::optional<c10::DeviceType> device_type = std::nullopt) {
|
||||
return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator();
|
||||
}
|
||||
|
||||
void lazyInitDevice(c10::DeviceType device_type) {
|
||||
if (device_type != at::kCPU) {
|
||||
c10::call_once(init_[static_cast<int8_t>(device_type)], [&] {
|
||||
getAcceleratorHooksInterface(device_type).init();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static bool hasOpenMP();
|
||||
static bool hasMKL();
|
||||
static bool hasLAPACK();
|
||||
@ -166,6 +165,27 @@ class TORCH_API Context {
|
||||
static bool hasMAIA() {
|
||||
return c10::impl::hasDeviceGuardImpl(c10::DeviceType::MAIA);
|
||||
}
|
||||
// defined in header so that getNonVariableType has ability to inline
|
||||
// call_once check. getNonVariableType is called fairly frequently
|
||||
void lazyInitCUDA() {
|
||||
c10::call_once(thc_init, [&] { detail::getCUDAHooks().initCUDA(); });
|
||||
}
|
||||
void lazyInitHIP() {
|
||||
c10::call_once(thh_init, [&] { detail::getHIPHooks().initHIP(); });
|
||||
}
|
||||
void lazyInitXPU() {
|
||||
c10::call_once(thx_init, [&] { detail::getXPUHooks().initXPU(); });
|
||||
}
|
||||
void lazyInitMTIA() {
|
||||
c10::call_once(th_mtia_init, [&] { detail::getMTIAHooks().initMTIA(); });
|
||||
}
|
||||
void lazyInitPrivateUse1() {
|
||||
c10::call_once(thp_init, [&] {
|
||||
if (isPrivateUse1HooksRegistered()) {
|
||||
at::detail::getPrivateUse1Hooks().initPrivateUse1();
|
||||
}
|
||||
});
|
||||
}
|
||||
static const at::cuda::NVRTC& getNVRTC() {
|
||||
return detail::getCUDAHooks().nvrtc();
|
||||
}
|
||||
@ -341,8 +361,27 @@ class TORCH_API Context {
|
||||
void setAllowFP16ReductionCPU(bool);
|
||||
|
||||
private:
|
||||
void initCUDAIfNeeded(c10::DeviceType p) {
|
||||
if (p == c10::DeviceType::CUDA) {
|
||||
lazyInitCUDA();
|
||||
}
|
||||
}
|
||||
void initHIPIfNeeded(c10::DeviceType p) {
|
||||
if (p == c10::DeviceType::HIP) {
|
||||
lazyInitHIP();
|
||||
}
|
||||
}
|
||||
void initXPUIfNeeded(c10::DeviceType p) {
|
||||
if (p == c10::DeviceType::XPU) {
|
||||
lazyInitXPU();
|
||||
}
|
||||
}
|
||||
static bool checkCuBLASConfigDeterministic();
|
||||
std::array<c10::once_flag, at::COMPILE_TIME_MAX_DEVICE_TYPES> init_;
|
||||
c10::once_flag thc_init;
|
||||
c10::once_flag thh_init;
|
||||
c10::once_flag thx_init;
|
||||
c10::once_flag th_mtia_init;
|
||||
c10::once_flag thp_init;
|
||||
bool enabled_cudnn = true;
|
||||
bool deterministic_cudnn = false;
|
||||
bool deterministic_mkldnn = false;
|
||||
@ -474,7 +513,7 @@ inline size_t getNumGPUs() {
|
||||
"to be CUDA (e.g., when you say CUDA, on a HIP build of ATen, this actually "
|
||||
"means HIP. Rebuild PyTorch with one or the other disabled.");
|
||||
} else if (hasCUDA()) {
|
||||
return detail::getCUDAHooks().deviceCount();
|
||||
return detail::getCUDAHooks().getNumGPUs();
|
||||
} else if (hasHIP()) {
|
||||
return detail::getHIPHooks().getNumGPUs();
|
||||
} else {
|
||||
@ -511,7 +550,7 @@ inline void manual_seed(uint64_t seed) {
|
||||
}
|
||||
// NB: Sometimes we build with CUDA, but we don't have any GPUs
|
||||
// available. In that case, we must not seed CUDA; it will fail!
|
||||
const auto cuda_num_gpus = detail::getCUDAHooks().deviceCount();
|
||||
const auto cuda_num_gpus = detail::getCUDAHooks().getNumGPUs();
|
||||
if (hasCUDA() && cuda_num_gpus > 0) {
|
||||
for (const auto i : c10::irange(cuda_num_gpus)) {
|
||||
auto cuda_gen = globalContext().defaultGenerator(
|
||||
@ -524,7 +563,7 @@ inline void manual_seed(uint64_t seed) {
|
||||
}
|
||||
}
|
||||
|
||||
const auto xpu_num_gpus = detail::getXPUHooks().deviceCount();
|
||||
const auto xpu_num_gpus = detail::getXPUHooks().getNumGPUs();
|
||||
if (hasXPU() && xpu_num_gpus) {
|
||||
for (const auto i : c10::irange(xpu_num_gpus)) {
|
||||
auto xpu_gen = globalContext().defaultGenerator(
|
||||
|
||||
@ -22,6 +22,13 @@ DLDataType getDLDataType(const Tensor& t) {
|
||||
case ScalarType::UInt64:
|
||||
dtype.code = DLDataTypeCode::kDLUInt;
|
||||
break;
|
||||
case ScalarType::Int1:
|
||||
case ScalarType::Int2:
|
||||
case ScalarType::Int3:
|
||||
case ScalarType::Int4:
|
||||
case ScalarType::Int5:
|
||||
case ScalarType::Int6:
|
||||
case ScalarType::Int7:
|
||||
case ScalarType::Char:
|
||||
dtype.code = DLDataTypeCode::kDLInt;
|
||||
break;
|
||||
|
||||
@ -112,12 +112,12 @@
|
||||
|
||||
// Ensure we never have too many scalar types for the expansion here to
|
||||
// support. To bump this, you must regenerate the macros below.
|
||||
static_assert(static_cast<int>(c10::ScalarType::NumOptions) < 45);
|
||||
static_assert(static_cast<int>(c10::ScalarType::NumOptions) < 60);
|
||||
|
||||
// Python code to regenerate generate code below:
|
||||
#if 0
|
||||
|
||||
num_args = 45
|
||||
num_args = 60
|
||||
|
||||
nums = ', '.join(str(i) for i in reversed(range(num_args+1)))
|
||||
args = ', '.join(f'_{i}' for i in range(1, num_args+1))
|
||||
@ -135,8 +135,8 @@ for i in range(1, num_args+1):
|
||||
// Begin generated code
|
||||
// clang-format off
|
||||
|
||||
#define AT_NUM_ARGS(...) AT_EXPAND(AT_NUM_ARGS_AUX(__VA_ARGS__, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
|
||||
#define AT_NUM_ARGS_AUX(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, N, ...) N
|
||||
#define AT_NUM_ARGS(...) AT_EXPAND(AT_NUM_ARGS_AUX(__VA_ARGS__, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
|
||||
#define AT_NUM_ARGS_AUX(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, N, ...) N
|
||||
#define AT_AP1(N, _1) AT_DISPATCH_CASE(_1, N)
|
||||
#define AT_AP2(N, _1, _2) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N)
|
||||
#define AT_AP3(N, _1, _2, _3) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N)
|
||||
@ -182,5 +182,21 @@ for i in range(1, num_args+1):
|
||||
#define AT_AP43(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N)
|
||||
#define AT_AP44(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N)
|
||||
#define AT_AP45(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N)
|
||||
#define AT_AP46(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N)
|
||||
#define AT_AP47(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N)
|
||||
#define AT_AP48(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N)
|
||||
#define AT_AP49(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N)
|
||||
#define AT_AP50(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N)
|
||||
#define AT_AP51(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N)
|
||||
#define AT_AP52(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N)
|
||||
#define AT_AP53(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N)
|
||||
#define AT_AP54(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N)
|
||||
#define AT_AP55(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N)
|
||||
#define AT_AP56(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N)
|
||||
#define AT_AP57(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N)
|
||||
#define AT_AP58(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N)
|
||||
#define AT_AP59(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N) AT_DISPATCH_CASE(_59, N)
|
||||
#define AT_AP60(N, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60) AT_DISPATCH_CASE(_1, N) AT_DISPATCH_CASE(_2, N) AT_DISPATCH_CASE(_3, N) AT_DISPATCH_CASE(_4, N) AT_DISPATCH_CASE(_5, N) AT_DISPATCH_CASE(_6, N) AT_DISPATCH_CASE(_7, N) AT_DISPATCH_CASE(_8, N) AT_DISPATCH_CASE(_9, N) AT_DISPATCH_CASE(_10, N) AT_DISPATCH_CASE(_11, N) AT_DISPATCH_CASE(_12, N) AT_DISPATCH_CASE(_13, N) AT_DISPATCH_CASE(_14, N) AT_DISPATCH_CASE(_15, N) AT_DISPATCH_CASE(_16, N) AT_DISPATCH_CASE(_17, N) AT_DISPATCH_CASE(_18, N) AT_DISPATCH_CASE(_19, N) AT_DISPATCH_CASE(_20, N) AT_DISPATCH_CASE(_21, N) AT_DISPATCH_CASE(_22, N) AT_DISPATCH_CASE(_23, N) AT_DISPATCH_CASE(_24, N) AT_DISPATCH_CASE(_25, N) AT_DISPATCH_CASE(_26, N) AT_DISPATCH_CASE(_27, N) AT_DISPATCH_CASE(_28, N) AT_DISPATCH_CASE(_29, N) AT_DISPATCH_CASE(_30, N) AT_DISPATCH_CASE(_31, N) AT_DISPATCH_CASE(_32, N) AT_DISPATCH_CASE(_33, N) AT_DISPATCH_CASE(_34, N) AT_DISPATCH_CASE(_35, N) AT_DISPATCH_CASE(_36, N) AT_DISPATCH_CASE(_37, N) AT_DISPATCH_CASE(_38, N) AT_DISPATCH_CASE(_39, N) AT_DISPATCH_CASE(_40, N) AT_DISPATCH_CASE(_41, N) AT_DISPATCH_CASE(_42, N) AT_DISPATCH_CASE(_43, N) AT_DISPATCH_CASE(_44, N) AT_DISPATCH_CASE(_45, N) AT_DISPATCH_CASE(_46, N) AT_DISPATCH_CASE(_47, N) AT_DISPATCH_CASE(_48, N) AT_DISPATCH_CASE(_49, N) AT_DISPATCH_CASE(_50, N) AT_DISPATCH_CASE(_51, N) AT_DISPATCH_CASE(_52, N) AT_DISPATCH_CASE(_53, N) AT_DISPATCH_CASE(_54, N) AT_DISPATCH_CASE(_55, N) AT_DISPATCH_CASE(_56, N) AT_DISPATCH_CASE(_57, N) AT_DISPATCH_CASE(_58, N) AT_DISPATCH_CASE(_59, N) AT_DISPATCH_CASE(_60, N)
|
||||
|
||||
// End generated code
|
||||
// clang-format on
|
||||
|
||||
@ -18,8 +18,6 @@ c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
|
||||
// To properly support this, see https://github.com/pytorch/pytorch/issues/14560
|
||||
if (at::globalContext().hasCUDA()) {
|
||||
return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
|
||||
} else if (at::globalContext().hasMTIA()) {
|
||||
return at::detail::getMTIAHooks().getPinnedMemoryAllocator();
|
||||
} else if (at::globalContext().hasXPU()) {
|
||||
return at::detail::getXPUHooks().getPinnedMemoryAllocator();
|
||||
} else if(at::isPrivateUse1HooksRegistered()) {
|
||||
|
||||
@ -420,15 +420,15 @@ inline c10::MaybeOwned<Tensor> expand_size(
|
||||
inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
|
||||
// expands a list of Tensors; ignores undefined (null) tensors
|
||||
bool first = true;
|
||||
SymDimVector sizes;
|
||||
DimVector sizes;
|
||||
for (const auto i : c10::irange(to_expand.size())) {
|
||||
if (!to_expand[i].defined()) {
|
||||
continue;
|
||||
} else if (first) {
|
||||
sizes = to_expand[i].sym_sizes();
|
||||
sizes = to_expand[i].sizes();
|
||||
first = false;
|
||||
} else {
|
||||
sizes = infer_size_symdimvector(sizes, to_expand[i].sym_sizes());
|
||||
sizes = infer_size_dimvector(sizes, to_expand[i].sizes());
|
||||
}
|
||||
}
|
||||
|
||||
@ -436,10 +436,10 @@ inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
|
||||
for (const auto i : c10::irange(to_expand.size())) {
|
||||
if (!to_expand[i].defined()) {
|
||||
continue;
|
||||
} else if (to_expand[i].sym_sizes().equals(sizes)) {
|
||||
} else if (to_expand[i].sizes().equals(sizes)) {
|
||||
result[i] = to_expand[i];
|
||||
} else {
|
||||
result[i] = to_expand[i].expand_symint(sizes);
|
||||
result[i] = to_expand[i].expand(sizes);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
@ -209,8 +209,8 @@ void init_num_threads() {
|
||||
}
|
||||
|
||||
void set_num_threads(int nthreads) {
|
||||
#ifndef C10_MOBILE
|
||||
TORCH_CHECK(nthreads > 0, "Expected positive number of threads");
|
||||
#ifndef C10_MOBILE
|
||||
int no_value = NOT_SET;
|
||||
if (!num_intraop_threads.compare_exchange_strong(no_value, nthreads)) {
|
||||
// num_intraop_threads either stores a positive integer or CONSUMED,
|
||||
@ -229,9 +229,8 @@ void set_num_threads(int nthreads) {
|
||||
}
|
||||
}
|
||||
#else
|
||||
caffe2::PThreadPool* const pool = caffe2::pthreadpool();
|
||||
caffe2::PThreadPool* const pool = caffe2::pthreadpool(nthreads);
|
||||
TORCH_INTERNAL_ASSERT(pool, "Invalid thread pool!");
|
||||
pool->set_thread_count(nthreads);
|
||||
#endif // C10_MOBILE
|
||||
}
|
||||
|
||||
|
||||
@ -19,7 +19,7 @@ Tensor& scalar_fill(Tensor& self, const Scalar& value) {
|
||||
AT_DISPATCH_V2(
|
||||
self.scalar_type(), "fill_out", AT_WRAP([&]() {
|
||||
fill_inplace<scalar_t>(self, value);
|
||||
}), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
|
||||
}), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
@ -144,8 +144,8 @@ class CheckSparseTensorInvariants {
|
||||
bool old_state;
|
||||
|
||||
public:
|
||||
CheckSparseTensorInvariants(bool state)
|
||||
: old_state(at::globalContext().checkSparseTensorInvariants()) {
|
||||
CheckSparseTensorInvariants(bool state) {
|
||||
old_state = at::globalContext().checkSparseTensorInvariants();
|
||||
at::globalContext().setCheckSparseTensorInvariants(state);
|
||||
}
|
||||
|
||||
|
||||
@ -255,9 +255,7 @@ inline Tensor applySelect(
|
||||
// the other hand, indexing wraping is valid for all negative int64_t
|
||||
// values, as x[INT64_MIN] is the same as x[INT64_MAX]
|
||||
TORCH_CHECK_INDEX(
|
||||
size.sym_gt(-1 - index)
|
||||
.sym_and(size.sym_gt(index))
|
||||
.expect_true(__FILE__, __LINE__),
|
||||
size > -1 - index && size > index,
|
||||
"index ",
|
||||
index,
|
||||
" is out of bounds for dimension ",
|
||||
|
||||
@ -82,7 +82,7 @@ class TORCH_API ThreadLocalState {
|
||||
!defined(BUILD_LITE_INTERPRETER)
|
||||
// TLS for autocast dtypes
|
||||
std::array<at::ScalarType, at::COMPILE_TIME_MAX_DEVICE_TYPES>
|
||||
autocast_dtypes_{};
|
||||
autocast_dtypes_;
|
||||
#endif
|
||||
|
||||
friend class ThreadLocalStateGuard;
|
||||
|
||||
@ -13,6 +13,8 @@
|
||||
|
||||
#include <ATen/core/Array.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Half.h>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
@ -306,10 +306,11 @@ struct VecConvert<float, 1, BFloat16, 1> {
|
||||
const VectorizedN<BFloat16, 1>& src) {
|
||||
VectorizedN<float, 1> result;
|
||||
uint16x8_t u16_8 = vld1q_u16(reinterpret_cast<const uint16_t*>(&src[0]));
|
||||
int32x4_t shift = vdupq_n_s32(16);
|
||||
auto u16_low1 = vget_low_u16(u16_8);
|
||||
auto u16_high1 = vget_high_u16(u16_8);
|
||||
float32x4_t f32x4_0 = vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(u16_low1), 16));
|
||||
float32x4_t f32x4_1 = vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(u16_high1), 16));
|
||||
float32x4_t f32x4_0 = vreinterpretq_f32_u32(vshlq_u32(vmovl_u16(u16_low1), shift));
|
||||
float32x4_t f32x4_1 = vreinterpretq_f32_u32(vshlq_u32(vmovl_u16(u16_high1), shift));
|
||||
result[0] = {f32x4_0, f32x4_1};
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -75,7 +75,7 @@ inline __m256i pack_saturate_and_clamp<int32_t>(
|
||||
int32_t /*min_val*/,
|
||||
int32_t /*max_val*/) {
|
||||
// This function is for linkage only, will not be used
|
||||
TORCH_CHECK(false, "pack_saturate_and_clamp<int32_t> is not supported");
|
||||
AT_ERROR("pack_saturate_and_clamp<int32_t> is not supported");
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@ -77,7 +77,7 @@ inline __m512i pack_saturate_and_clamp<int32_t>(
|
||||
int32_t min_val [[maybe_unused]],
|
||||
int32_t max_val [[maybe_unused]]) {
|
||||
// This function is for linkage only, will not be used
|
||||
TORCH_CHECK(false, "pack_saturate_and_clamp<int32_t> is not supported");
|
||||
AT_ERROR("pack_saturate_and_clamp<int32_t> is not supported");
|
||||
return __m512i{};
|
||||
}
|
||||
|
||||
|
||||
@ -125,7 +125,7 @@ void CUDAGraph::capture_begin(MempoolId_t pool/*=0*/, cudaStreamCaptureMode capt
|
||||
// due to the capture status being updated _after_ a capture had already started.
|
||||
c10::cuda::CUDACachingAllocator::beginAllocateToPool(capture_dev_, mempool_id_, [this](cudaStream_t stream) {
|
||||
cudaStreamCaptureStatus status;
|
||||
CaptureId_t stream_capture_id = 0;
|
||||
CaptureId_t stream_capture_id;
|
||||
AT_CUDA_CHECK(cudaStreamGetCaptureInfo(stream, &status, &stream_capture_id));
|
||||
return status == cudaStreamCaptureStatus::cudaStreamCaptureStatusActive && stream_capture_id == capture_id_;
|
||||
});
|
||||
|
||||
@ -10,7 +10,7 @@ TensorBase empty_cuda(
|
||||
ScalarType dtype,
|
||||
std::optional<Device> device_opt,
|
||||
std::optional<c10::MemoryFormat> memory_format_opt) {
|
||||
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
|
||||
at::globalContext().lazyInitCUDA();
|
||||
const auto device = device_or_default(device_opt);
|
||||
TORCH_INTERNAL_ASSERT(device.is_cuda());
|
||||
const DeviceGuard device_guard(device);
|
||||
@ -50,7 +50,7 @@ TensorBase empty_strided_cuda(
|
||||
IntArrayRef stride,
|
||||
ScalarType dtype,
|
||||
std::optional<Device> device_opt) {
|
||||
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
|
||||
at::globalContext().lazyInitCUDA();
|
||||
const auto device = device_or_default(device_opt);
|
||||
TORCH_INTERNAL_ASSERT(device.is_cuda());
|
||||
const DeviceGuard device_guard(device);
|
||||
|
||||
@ -34,7 +34,7 @@ void init_p2p_access_cache(int64_t num_devices) {
|
||||
} // namespace detail
|
||||
|
||||
bool get_p2p_access(int dev, int dev_to_access) {
|
||||
at::globalContext().lazyInitDevice(c10::DeviceType::CUDA);
|
||||
at::globalContext().lazyInitCUDA();
|
||||
|
||||
TORCH_CHECK(dev >= 0 || dev < num_devices_,
|
||||
dev, " is not a device");
|
||||
|
||||
@ -14,7 +14,6 @@
|
||||
#include <ATen/detail/CUDAHooksInterface.h>
|
||||
#include <ATen/native/cuda/CuFFTPlanCache.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/env.h>
|
||||
#include <c10/cuda/CUDACachingAllocator.h>
|
||||
#include <c10/cuda/CUDAFunctions.h>
|
||||
#include <c10/util/irange.h>
|
||||
@ -80,19 +79,30 @@ struct _Initializer {
|
||||
} initializer;
|
||||
} // anonymous namespace
|
||||
|
||||
// Sets the CUDA_MODULE_LOADING environment variable
|
||||
// if it's not set by the user.
|
||||
void maybe_set_cuda_module_loading(const std::string &def_value) {
|
||||
auto value = std::getenv("CUDA_MODULE_LOADING");
|
||||
if (!value) {
|
||||
#ifdef _WIN32
|
||||
auto env_var = "CUDA_MODULE_LOADING=" + def_value;
|
||||
_putenv(env_var.c_str());
|
||||
#else
|
||||
setenv("CUDA_MODULE_LOADING", def_value.c_str(), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// NB: deleter is dynamic, because we need it to live in a separate
|
||||
// compilation unit (alt is to have another method in hooks, but
|
||||
// let's not if we don't need to!)
|
||||
void CUDAHooks::init() const {
|
||||
void CUDAHooks::initCUDA() const {
|
||||
C10_LOG_API_USAGE_ONCE("aten.init.cuda");
|
||||
// Force the update to enable unit testing. This code get executed before unit tests
|
||||
// have a chance to enable vitals.
|
||||
at::vitals::VitalsAPI.setVital("CUDA", "used", "true", /* force = */ true);
|
||||
|
||||
// Sets the CUDA_MODULE_LOADING environment variable
|
||||
// if it's not set by the user.
|
||||
c10::utils::set_env("CUDA_MODULE_LOADING", "LAZY", false);
|
||||
maybe_set_cuda_module_loading("LAZY");
|
||||
const auto num_devices = c10::cuda::device_count_ensure_non_zero();
|
||||
c10::cuda::CUDACachingAllocator::init(num_devices);
|
||||
at::cuda::detail::init_p2p_access_cache(num_devices);
|
||||
@ -103,7 +113,7 @@ void CUDAHooks::init() const {
|
||||
#endif
|
||||
}
|
||||
|
||||
const Generator& CUDAHooks::getDefaultGenerator(DeviceIndex device_index) const {
|
||||
const Generator& CUDAHooks::getDefaultCUDAGenerator(DeviceIndex device_index) const {
|
||||
return at::cuda::detail::getDefaultCUDAGenerator(device_index);
|
||||
}
|
||||
|
||||
@ -231,9 +241,6 @@ DeviceIndex current_device() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: use getCurrentDevice() instead
|
||||
*/
|
||||
DeviceIndex CUDAHooks::current_device() const {
|
||||
return at::cuda::detail::current_device();
|
||||
}
|
||||
@ -429,21 +436,10 @@ void CUDAHooks::cuFFTClearPlanCache(DeviceIndex device_index) const {
|
||||
at::native::detail::cufft_clear_plan_cache_impl(device_index);
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: use deviceCount() instead
|
||||
*/
|
||||
int CUDAHooks::getNumGPUs() const {
|
||||
return at::cuda::device_count();
|
||||
}
|
||||
|
||||
DeviceIndex CUDAHooks::deviceCount() const {
|
||||
return at::cuda::device_count();
|
||||
}
|
||||
|
||||
DeviceIndex CUDAHooks::getCurrentDevice() const {
|
||||
return at::cuda::detail::current_device();
|
||||
}
|
||||
|
||||
#ifdef USE_ROCM
|
||||
bool CUDAHooks::isGPUArch(DeviceIndex device_index, const std::vector<std::string>& archs) const {
|
||||
hipDeviceProp_t* prop = at::cuda::getDeviceProperties(device_index);
|
||||
|
||||
@ -19,11 +19,10 @@ TORCH_CUDA_CPP_API void set_magma_init_fn(void (*magma_init_fn)());
|
||||
// The real implementation of CUDAHooksInterface
|
||||
struct CUDAHooks : public at::CUDAHooksInterface {
|
||||
CUDAHooks(at::CUDAHooksArgs) {}
|
||||
void init() const override;
|
||||
void initCUDA() const override;
|
||||
Device getDeviceFromPtr(void* data) const override;
|
||||
bool isPinnedPtr(const void* data) const override;
|
||||
const Generator& getDefaultGenerator(
|
||||
DeviceIndex device_index = -1) const override;
|
||||
const Generator& getDefaultCUDAGenerator(DeviceIndex device_index = -1) const override;
|
||||
bool hasCUDA() const override;
|
||||
bool hasMAGMA() const override;
|
||||
bool hasCuDNN() const override;
|
||||
@ -50,9 +49,6 @@ struct CUDAHooks : public at::CUDAHooksInterface {
|
||||
int64_t cuFFTGetPlanCacheSize(DeviceIndex device_index) const override;
|
||||
void cuFFTClearPlanCache(DeviceIndex device_index) const override;
|
||||
int getNumGPUs() const override;
|
||||
DeviceIndex deviceCount() const override;
|
||||
DeviceIndex getCurrentDevice() const override;
|
||||
|
||||
#ifdef USE_ROCM
|
||||
bool isGPUArch(DeviceIndex device_index, const std::vector<std::string>& archs) const override;
|
||||
#endif
|
||||
|
||||
@ -77,31 +77,6 @@ default, now called through TunableOp. Any call to at::cuda::blas::gemm() or ::b
|
||||
when enabled. Calling gemm() for a given set of input arguments (transa, transb, m, n, k) will attempt to use the
|
||||
fastest available implementation across both rocblas and hipblaslt.
|
||||
|
||||
## Offline Tuning
|
||||
|
||||
### Motivation
|
||||
Basically it is used for workload with high-memory utilization where one might run out of memory with regular tuning.
|
||||
|
||||
### Workflow
|
||||
There are basically two steps:
|
||||
1) Set the environment variables to collect the untuned GEMM and this will generate `tunableop_untuned?.csv` ("?" is placeholder for the GPU ID), like:
|
||||
```
|
||||
PYTORCH_TUNABLEOP_ENABLED=1
|
||||
PYTORCH_TUNABLEOP_TUNING=0
|
||||
PYTORCH_TUNABLEOP_RECORD_UNTUNED=1
|
||||
...
|
||||
```
|
||||
2) Run a Python script that reads the `tunableop_untuned?.csv` and generates the `tunableop_results?.csv`, like:
|
||||
```
|
||||
import torch.cuda.tunable as tunable
|
||||
import os
|
||||
|
||||
os.putenv('PYTORCH_TUNABLEOP_ENABLED', '1')
|
||||
os.putenv('PYTORCH_TUNABLEOP_TUNING', '1')
|
||||
os.putenv('PYTORCH_TUNABLEOP_RECORD_UNTUNED', '0')
|
||||
tunable.tune_gemm_in_file("tunableop_results?.csv")
|
||||
```
|
||||
|
||||
## Tuning Context
|
||||
The behavior of TunableOp is currently manipulated through environment variables, the C++ interface of
|
||||
at::cuda::tunable::getTuningContext(), or the `torch.cuda.tunable` python interfaces. The environment variables take
|
||||
@ -115,8 +90,6 @@ programmatically since the settings become fixed. Use the C++ or Python APIs ins
|
||||
| -------------------- | ----------- |
|
||||
| PYTORCH_TUNABLEOP_ENABLED | Default is 0. Set to 1 to enable. |
|
||||
| PYTORCH_TUNABLEOP_TUNING | Default is 1. Set to 0 to disable. |
|
||||
| PYTORCH_TUNABLEOP_RECORD_UNTUNED | Default is 0. Set to 1 to enable. |
|
||||
| PYTORCH_TUNABLEOP_UNTUNED_FILENAME | Default is 'tunableop_untuned.csv'. |
|
||||
| PYTORCH_TUNABLEOP_VERBOSE | Default is 0. Set to 1 to enable basic logging. 2 for basic tuning status. 3 for full trace. |
|
||||
| PYTORCH_TUNABLEOP_VERBOSE_FILENAME | Default is "err" for stderr. Set to "out" for stdout or a filename for capturing verbose logging. |
|
||||
| PYTORCH_TUNABLEOP_FILENAME | Default is 'tunableop_results.csv'. |
|
||||
@ -139,8 +112,6 @@ All python APIs exist in the `torch.cuda.tunable` module.
|
||||
| is_enabled() -> bool | |
|
||||
| tuning_enable(val: bool = True) -> None | Default is True. |
|
||||
| tuning_is_enabled() -> bool | |
|
||||
| record_untuned_enable(val: bool = True) -> None | Default is True. |
|
||||
| record_untuned_is_enabled() -> bool | |
|
||||
| set_max_tuning_duration(duration: int) -> None | |
|
||||
| get_max_tuning_duration() -> int | |
|
||||
| set_max_tuning_iterations(iterations: int) -> None | |
|
||||
@ -152,7 +123,6 @@ All python APIs exist in the `torch.cuda.tunable` module.
|
||||
| write_file_on_exit(val: bool) -> None | Default is True. |
|
||||
| write_file(filename: Optional[str] = None) -> None | If filename not given, it will call get_filename(). |
|
||||
| read_file(filename: Optional[str] = None) -> None | If filename not given, it will call get_filename(). |
|
||||
| tune_gemm_in_file(filename: str) -> None | read an untuned file and tune GEMMs in it. |
|
||||
|
||||
### C++ Interface
|
||||
Example:
|
||||
|
||||
@ -112,32 +112,6 @@ void TuningResultsManager::Add(const std::string& op_signature, const std::strin
|
||||
AddImpl(op_signature, params_signature, best, it->second);
|
||||
}
|
||||
|
||||
void TuningResultsManager::RecordUntuned( std::ofstream& untuned_file, const std::string& op_signature, const std::string& params_signature) {
|
||||
std::scoped_lock l{lock_};
|
||||
if (!untuned_file.good()) {
|
||||
TORCH_WARN_ONCE("failed to open file for writing; untuned gemm will not be saved");
|
||||
return;
|
||||
} else {
|
||||
bool isNew = false;
|
||||
auto it = untuned_results_.find(op_signature);
|
||||
if (it == untuned_results_.end()) {
|
||||
it = untuned_results_.insert({op_signature, {}}).first;
|
||||
isNew = true;
|
||||
}
|
||||
|
||||
auto it_kernel_map = it->second.find(params_signature);
|
||||
if (it_kernel_map == it->second.end()) {
|
||||
it->second.insert(params_signature);
|
||||
isNew = true;
|
||||
}
|
||||
|
||||
if (isNew) {
|
||||
untuned_file << op_signature << "," << params_signature << std::endl;
|
||||
TUNABLE_LOG3("Untuned,", op_signature, ",", params_signature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TuningResultsManager::Delete(const std::string& op_signature, const std::string& params_signature) {
|
||||
std::scoped_lock l{lock_};
|
||||
|
||||
@ -385,7 +359,6 @@ TuningStatus TuningResultsValidator::ValidatePyTorchVersion(const std::string& v
|
||||
TuningContext::TuningContext() :
|
||||
enable_{false},
|
||||
tuning_enable_{true},
|
||||
record_untuned_enable_{false},
|
||||
manager_initialized_{false},
|
||||
write_file_on_exit_{true},
|
||||
numerics_check_enable_{false},
|
||||
@ -396,7 +369,6 @@ TuningContext::TuningContext() :
|
||||
icache_flush_{true},
|
||||
rotating_buffer_size_{-1},
|
||||
filename_{},
|
||||
untuned_file_{},
|
||||
results_count_from_input_file_{0}
|
||||
{
|
||||
}
|
||||
@ -422,10 +394,6 @@ TuningContext::~TuningContext() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (untuned_file_.good()) {
|
||||
untuned_file_.close();
|
||||
}
|
||||
}
|
||||
|
||||
void TuningContext::EnableTunableOp(bool value) {
|
||||
@ -456,15 +424,6 @@ void TuningContext::EnableTuning(bool value) {
|
||||
}
|
||||
}
|
||||
|
||||
void TuningContext::EnableRecordUntuned(bool value) {
|
||||
record_untuned_enable_ = value;
|
||||
if (value) {
|
||||
TUNABLE_LOG1("Enable Record Untuned for TunableOp");
|
||||
} else {
|
||||
TUNABLE_LOG1("Disable Record Untuned for TunableOp");
|
||||
}
|
||||
}
|
||||
|
||||
bool TuningContext::IsTuningEnabled() const {
|
||||
static const char *env = std::getenv("PYTORCH_TUNABLEOP_TUNING");
|
||||
if (env != nullptr && strcmp(env, "0") == 0) {
|
||||
@ -473,33 +432,6 @@ bool TuningContext::IsTuningEnabled() const {
|
||||
return tuning_enable_;
|
||||
}
|
||||
|
||||
bool TuningContext::IsRecordUntunedEnabled() const {
|
||||
static const char *env = std::getenv("PYTORCH_TUNABLEOP_RECORD_UNTUNED");
|
||||
if (env != nullptr && strcmp(env, "1") == 0) {
|
||||
return true;
|
||||
}
|
||||
return record_untuned_enable_;
|
||||
}
|
||||
|
||||
std::ofstream& TuningContext::GetUntunedFile(){
|
||||
if (!untuned_file_.is_open()) {
|
||||
const char *env = std::getenv("PYTORCH_TUNABLEOP_UNTUNED_FILENAME");
|
||||
std::string filename = (env == nullptr) ? "tunableop_untuned.csv" : env;
|
||||
|
||||
std::string device = c10::str(int(c10::cuda::current_device()));
|
||||
std::size_t found = filename.rfind(".");
|
||||
if (found != std::string::npos) {
|
||||
filename.insert(found, device);
|
||||
} else {
|
||||
// all else fails, just append
|
||||
filename.append(device);
|
||||
}
|
||||
|
||||
untuned_file_ = std::ofstream(filename, std::ios::out | std::ios::trunc);
|
||||
}
|
||||
return untuned_file_;
|
||||
}
|
||||
|
||||
void TuningContext::WriteFileOnExit(bool value) {
|
||||
write_file_on_exit_ = value;
|
||||
}
|
||||
@ -613,7 +545,7 @@ TuningResultsManager& TuningContext::GetTuningResultsManager() {
|
||||
SetFilename(filename, true);
|
||||
}
|
||||
auto filename = GetFilename();
|
||||
if (!filename.empty() && !IsRecordUntunedEnabled()) {
|
||||
if (!filename.empty()) {
|
||||
ReadFile(filename);
|
||||
// attempt immediately to open file for writing to catch errors early
|
||||
std::ofstream file(filename, std::ios::out | std::ios::app);
|
||||
|
||||
@ -19,7 +19,6 @@
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@ -88,7 +87,6 @@ class TORCH_CUDA_CPP_API ResultEntry {
|
||||
|
||||
typedef std::unordered_map<std::string, ResultEntry> KernelMap;
|
||||
typedef std::unordered_map<std::string, KernelMap> ResultsMap;
|
||||
typedef std::unordered_map<std::string, std::unordered_set<std::string>> UntunedMap;
|
||||
|
||||
struct TORCH_CUDA_CPP_API TuningResults {
|
||||
// Validates if these results are compatible with the libraries
|
||||
@ -131,12 +129,9 @@ class TORCH_CUDA_CPP_API TuningResultsManager {
|
||||
|
||||
size_t GetSize();
|
||||
|
||||
void RecordUntuned( std::ofstream& untuned_file, const std::string& op_signature, const std::string& params_signature);
|
||||
private:
|
||||
std::mutex lock_;
|
||||
ResultsMap results_;
|
||||
UntunedMap untuned_results_;
|
||||
|
||||
};
|
||||
|
||||
class TORCH_CUDA_CPP_API TuningResultsValidator {
|
||||
@ -178,10 +173,6 @@ class TORCH_CUDA_CPP_API TuningContext {
|
||||
void EnableTuning(bool value);
|
||||
bool IsTuningEnabled() const;
|
||||
|
||||
void EnableRecordUntuned(bool value);
|
||||
bool IsRecordUntunedEnabled() const;
|
||||
std::ofstream& GetUntunedFile();
|
||||
|
||||
void EnableNumericsCheck(bool value);
|
||||
bool IsNumericsCheckEnabled() const;
|
||||
|
||||
@ -222,7 +213,6 @@ class TORCH_CUDA_CPP_API TuningContext {
|
||||
private:
|
||||
bool enable_;
|
||||
bool tuning_enable_;
|
||||
bool record_untuned_enable_;
|
||||
bool manager_initialized_;
|
||||
bool write_file_on_exit_;
|
||||
bool numerics_check_enable_;
|
||||
@ -236,7 +226,6 @@ class TORCH_CUDA_CPP_API TuningContext {
|
||||
mutable c10::once_flag manager_init_once_;
|
||||
TuningResultsValidator validator_;
|
||||
std::string filename_;
|
||||
std::ofstream untuned_file_;
|
||||
size_t results_count_from_input_file_;
|
||||
};
|
||||
|
||||
|
||||
@ -54,15 +54,9 @@ class TunableOp {
|
||||
auto params_sig = params->Signature();
|
||||
result = mgr.Lookup(op_sig, params_sig);
|
||||
// If there is not previous tuning result been found, we do the tuning iff tuning is enabled
|
||||
if (result == ResultEntry::Null()) {
|
||||
if (ctx->IsTuningEnabled()) {
|
||||
result = FindFastest(params);
|
||||
mgr.Add(op_sig, params_sig, result);
|
||||
}
|
||||
else if (ctx->IsRecordUntunedEnabled()) {
|
||||
// or record the gemm into file
|
||||
mgr.RecordUntuned(ctx->GetUntunedFile(), op_sig, params_sig);
|
||||
}
|
||||
if (result == ResultEntry::Null() && ctx->IsTuningEnabled()) {
|
||||
result = FindFastest(params);
|
||||
mgr.Add(op_sig, params_sig, result);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
@ -1,13 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/core/Device.h>
|
||||
#include <c10/core/Stream.h>
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
|
||||
|
||||
namespace at {
|
||||
|
||||
// AcceleratorHooksInterface is a shared interface provided by all
|
||||
@ -23,10 +19,6 @@ struct TORCH_API AcceleratorHooksInterface {
|
||||
// Whether the device at device_index is fully initialized or not.
|
||||
virtual bool hasPrimaryContext(DeviceIndex device_index) const = 0;
|
||||
|
||||
virtual void init() const {
|
||||
TORCH_CHECK(false, "Backend doesn`t support init()");
|
||||
}
|
||||
|
||||
virtual DeviceIndex deviceCount() const {
|
||||
return 0;
|
||||
}
|
||||
@ -58,18 +50,7 @@ struct TORCH_API AcceleratorHooksInterface {
|
||||
TORCH_CHECK(false, "Backend doesn't support getPinnedMemoryAllocator()");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
virtual const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const {
|
||||
TORCH_CHECK(false, "Backend doesn`t support getDefaultGenerator()");
|
||||
}
|
||||
|
||||
virtual Generator getNewGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const {
|
||||
TORCH_CHECK(false, "Backend doesn`t support getNewGenerator()");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace at
|
||||
|
||||
C10_DIAGNOSTIC_POP()
|
||||
|
||||
@ -6,13 +6,16 @@
|
||||
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
// NB: Class must live in `at` due to limitations of Registry.h.
|
||||
// Forward-declares at::Generator and at::cuda::NVRTC
|
||||
namespace at {
|
||||
|
||||
// Forward-declares at::cuda::NVRTC
|
||||
struct Generator;
|
||||
namespace cuda {
|
||||
struct NVRTC;
|
||||
} // namespace cuda
|
||||
} // namespace at
|
||||
|
||||
// NB: Class must live in `at` due to limitations of Registry.h.
|
||||
namespace at {
|
||||
|
||||
#ifdef _MSC_VER
|
||||
constexpr const char* CUDA_HELP =
|
||||
@ -62,16 +65,12 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
|
||||
~CUDAHooksInterface() override = default;
|
||||
|
||||
// Initialize THCState and, transitively, the CUDA state
|
||||
void init() const override {
|
||||
virtual void initCUDA() const {
|
||||
TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP);
|
||||
}
|
||||
|
||||
const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Cannot get default CUDA generator without ATen_cuda library. ",
|
||||
CUDA_HELP);
|
||||
virtual const Generator& getDefaultCUDAGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
|
||||
TORCH_CHECK(false, "Cannot get default CUDA generator without ATen_cuda library. ", CUDA_HELP);
|
||||
}
|
||||
|
||||
virtual Device getDeviceFromPtr(void* /*data*/) const {
|
||||
|
||||
@ -1,13 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/core/GeneratorImpl.h>
|
||||
#include <c10/util/Exception.h>
|
||||
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace at {
|
||||
class Context;
|
||||
}
|
||||
|
||||
// NB: Class must live in `at` due to limitations of Registry.h.
|
||||
namespace at {
|
||||
|
||||
@ -20,13 +26,13 @@ struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
|
||||
// squelch -Werror=non-virtual-dtor
|
||||
~HIPHooksInterface() override = default;
|
||||
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library.");
|
||||
// Initialize the HIP library state
|
||||
virtual void initHIP() const {
|
||||
AT_ERROR("Cannot initialize HIP without ATen_hip library.");
|
||||
}
|
||||
|
||||
const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library.");
|
||||
virtual std::unique_ptr<c10::GeneratorImpl> initHIPGenerator(Context*) const {
|
||||
AT_ERROR("Cannot initialize HIP generator without ATen_hip library.");
|
||||
}
|
||||
|
||||
virtual bool hasHIP() const {
|
||||
@ -45,6 +51,10 @@ struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
|
||||
AT_ERROR("Pinned memory requires HIP.");
|
||||
}
|
||||
|
||||
virtual void registerHIPTypes(Context*) const {
|
||||
AT_ERROR("Cannot registerHIPTypes() without ATen_hip library.");
|
||||
}
|
||||
|
||||
virtual int getNumGPUs() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,33 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
struct TORCH_API IPUHooksInterface : AcceleratorHooksInterface {
|
||||
~IPUHooksInterface() override = default;
|
||||
struct TORCH_API IPUHooksInterface {
|
||||
virtual ~IPUHooksInterface() = default;
|
||||
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
|
||||
virtual const Generator& getDefaultIPUGenerator(
|
||||
DeviceIndex device_index [[maybe_unused]] = -1) const {
|
||||
AT_ERROR(
|
||||
"Cannot get the default IPU generator: the IPU backend is not "
|
||||
"available.");
|
||||
}
|
||||
|
||||
bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
|
||||
return false;
|
||||
}
|
||||
|
||||
const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
|
||||
}
|
||||
|
||||
Generator getNewGenerator(
|
||||
DeviceIndex device_index [[maybe_unused]] = -1) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
|
||||
virtual Generator newIPUGenerator(DeviceIndex device_index [[maybe_unused]] = -1) const {
|
||||
AT_ERROR(
|
||||
"Cannot create a new IPU generator: the IPU backend is not available.");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -3,24 +3,13 @@
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
// NB: Class must live in `at` due to limitations of Registry.h.
|
||||
namespace at {
|
||||
|
||||
struct TORCH_API MAIAHooksInterface : AcceleratorHooksInterface {
|
||||
struct TORCH_API MAIAHooksInterface {
|
||||
// This should never actually be implemented, but it is used to
|
||||
// squelch -Werror=non-virtual-dtor
|
||||
~MAIAHooksInterface() override = default;
|
||||
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library.");
|
||||
}
|
||||
|
||||
bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library.");
|
||||
return false;
|
||||
}
|
||||
virtual ~MAIAHooksInterface() = default;
|
||||
|
||||
virtual std::string showConfig() const {
|
||||
TORCH_CHECK(false, "Cannot query detailed MAIA version information.");
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
@ -22,7 +22,7 @@ struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface {
|
||||
~MPSHooksInterface() override = default;
|
||||
|
||||
// Initialize the MPS library state
|
||||
void init() const override {
|
||||
virtual void initMPS() const {
|
||||
FAIL_MPSHOOKS_FUNC(__func__);
|
||||
}
|
||||
virtual bool hasMPS() const {
|
||||
@ -31,8 +31,7 @@ struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface {
|
||||
virtual bool isOnMacOSorNewer(unsigned major = 13, unsigned minor = 0) const {
|
||||
FAIL_MPSHOOKS_FUNC(__func__);
|
||||
}
|
||||
const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
virtual const Generator& getDefaultMPSGenerator() const {
|
||||
FAIL_MPSHOOKS_FUNC(__func__);
|
||||
}
|
||||
virtual Allocator* getMPSDeviceAllocator() const {
|
||||
|
||||
@ -31,7 +31,7 @@ struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface {
|
||||
|
||||
~MTIAHooksInterface() override = default;
|
||||
|
||||
void init() const override {
|
||||
virtual void initMTIA() const {
|
||||
// Avoid logging here, since MTIA needs init devices first then it will know
|
||||
// how many devices are available. Make it as no-op if mtia extension is not
|
||||
// dynamically loaded.
|
||||
|
||||
@ -1,20 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/core/Device.h>
|
||||
#include <c10/core/Storage.h>
|
||||
#include <c10/util/Exception.h>
|
||||
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
|
||||
|
||||
namespace at {
|
||||
|
||||
struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
|
||||
~PrivateUse1HooksInterface() override = default;
|
||||
|
||||
const at::Generator& getDefaultGenerator(
|
||||
c10::DeviceIndex device_index) const override {
|
||||
virtual const at::Generator& getDefaultGenerator(
|
||||
c10::DeviceIndex device_index) const {
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||
false,
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDefaultGenerator`.");
|
||||
@ -26,23 +24,23 @@ struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDeviceFromPtr`.");
|
||||
}
|
||||
|
||||
bool isPinnedPtr(const void* data) const override {
|
||||
virtual bool isPinnedPtr(const void* data) const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
Allocator* getPinnedMemoryAllocator() const override {
|
||||
virtual Allocator* getPinnedMemoryAllocator() const override {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getPinnedMemoryAllocator`.");
|
||||
}
|
||||
|
||||
bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
virtual bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||
false,
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `hasPrimaryContext`.");
|
||||
}
|
||||
|
||||
void init() const override {}
|
||||
virtual void initPrivateUse1() const {}
|
||||
virtual void resizePrivateUse1Bytes(
|
||||
const c10::Storage& storage,
|
||||
size_t newsize) const {
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
|
||||
@ -13,8 +14,10 @@ namespace at {
|
||||
struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
|
||||
~XPUHooksInterface() override = default;
|
||||
|
||||
void init() const override {
|
||||
TORCH_CHECK(false, "Cannot initialize XPU without ATen_xpu library.");
|
||||
virtual void initXPU() const {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Cannot initialize XPU without ATen_xpu library.");
|
||||
}
|
||||
|
||||
virtual bool hasXPU() const {
|
||||
@ -31,15 +34,12 @@ struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
|
||||
TORCH_CHECK(false, "Cannot get XPU global device index without ATen_xpu library.");
|
||||
}
|
||||
|
||||
const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(
|
||||
false, "Cannot get default XPU generator without ATen_xpu library.");
|
||||
virtual Generator getXPUGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
|
||||
TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library.");
|
||||
}
|
||||
|
||||
Generator getNewGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library.");
|
||||
virtual const Generator& getDefaultXPUGenerator(C10_UNUSED DeviceIndex device_index = -1) const {
|
||||
TORCH_CHECK(false, "Cannot get default XPU generator without ATen_xpu library.");
|
||||
}
|
||||
|
||||
virtual DeviceIndex getNumGPUs() const {
|
||||
|
||||
0
aten/src/ATen/function_wrapper.py
Normal file
0
aten/src/ATen/function_wrapper.py
Normal file
@ -362,7 +362,6 @@ static std::tuple<Tensor,Tensor,Tensor> convolution_backward_plumbing(
|
||||
const Tensor& grad_output_, const Tensor& input_, const Tensor& weight_,
|
||||
const c10::OptionalArrayRef<SymInt> bias_sizes_opt,
|
||||
c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed,
|
||||
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
||||
c10::SymIntArrayRef output_padding, c10::SymInt groups, std::array<bool, 3> output_mask) {
|
||||
const auto maybe_layer = maybeCurrentDynamicLayer();
|
||||
vmap_check_escaped(maybe_layer, "convolution_backward_plumbing");
|
||||
|
||||
@ -458,16 +458,6 @@ inline int64_t get_bdim_size2(
|
||||
TORCH_INTERNAL_ASSERT(false);
|
||||
}
|
||||
|
||||
inline c10::SymInt get_bdim_size2_symint(
|
||||
const Tensor& a_value, std::optional<int64_t> a_bdim,
|
||||
const Tensor& b_value, std::optional<int64_t> b_bdim) {
|
||||
if (a_bdim)
|
||||
return a_value.sym_size(*a_bdim);
|
||||
if (b_bdim)
|
||||
return b_value.sym_size(*b_bdim);
|
||||
TORCH_INTERNAL_ASSERT(false);
|
||||
}
|
||||
|
||||
// [start, start + 1, ..., stop - 1]
|
||||
inline VmapDimVector range(int64_t start, int64_t stop) {
|
||||
TORCH_INTERNAL_ASSERT(stop >= start);
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
#include <ATen/core/dispatch/Dispatcher.h>
|
||||
#include <ATen/functorch/BatchRulesHelper.h>
|
||||
|
||||
namespace at::functorch {
|
||||
namespace at { namespace functorch {
|
||||
|
||||
#define OP_DECOMPOSE(op) m.impl(#op, static_cast<decltype(&ATEN_FN(op))>(native::op));
|
||||
#define OP_DECOMPOSE2(op, overload) m.impl(#op"."#overload, static_cast<decltype(&ATEN_FN2(op, overload))>(native::op));
|
||||
@ -20,4 +20,4 @@ TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {
|
||||
OP_DECOMPOSE(_unsafe_masked_index_put_accumulate);
|
||||
}
|
||||
|
||||
}
|
||||
}}
|
||||
|
||||
@ -226,7 +226,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes
|
||||
if (num_classes <= 0) {
|
||||
AT_ERROR("Can not infer total number of classes from empty tensor.");
|
||||
} else {
|
||||
shape.emplace_back(num_classes);
|
||||
shape.push_back(num_classes);
|
||||
return at::empty_symint(shape, self.options());
|
||||
}
|
||||
}
|
||||
@ -246,7 +246,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes
|
||||
// TORCH_CHECK(num_classes > self.max().item().toLong(), "Class values must be smaller than num_classes.");
|
||||
// }
|
||||
|
||||
shape.emplace_back(num_classes);
|
||||
shape.push_back(num_classes);
|
||||
Tensor ret = at::zeros_symint(shape, self.options());
|
||||
return ret.scatter(-1, self.unsqueeze(-1), 1);
|
||||
}
|
||||
|
||||
@ -213,7 +213,7 @@ static std::tuple<Tensor,Tensor> native_dropout_batching_rule(const Tensor& tens
|
||||
return std::make_tuple(output, mask);
|
||||
}
|
||||
|
||||
static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_samples, const bool replacement, std::optional<Generator> generator) {
|
||||
static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_samples, const bool replacement, const std::optional<Generator> generator) {
|
||||
c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);
|
||||
auto maybe_layer = maybeCurrentDynamicLayer();
|
||||
const auto cur_level = maybe_layer->layerId();
|
||||
@ -237,7 +237,7 @@ static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_sa
|
||||
if (is_2D_case) {
|
||||
self_value = reshape_dim_into(0, 0, self_value);
|
||||
}
|
||||
auto out = multinomial(self_value, num_samples, replacement, std::move(generator));
|
||||
auto out = multinomial(self_value, num_samples, replacement, generator);
|
||||
if (is_2D_case) {
|
||||
out = reshape_dim_outof_symint(0, maybe_layer->batchSize(), out);
|
||||
}
|
||||
@ -249,7 +249,7 @@ static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_sa
|
||||
// Must be same randomness with unbatched input
|
||||
// 1D case: S -> multinomial(S) -> S
|
||||
// 2D case: MS -> multinomial(MS) -> MS
|
||||
return multinomial(self_value, num_samples, replacement, std::move(generator));
|
||||
return multinomial(self_value, num_samples, replacement, generator);
|
||||
}
|
||||
|
||||
template <typename A, A a, typename C>
|
||||
|
||||
@ -58,7 +58,7 @@ static int64_t get_max_index_logical_dim(
|
||||
static std::vector<std::optional<Tensor>> batchIndices(
|
||||
ArrayRef<std::optional<Tensor>> indices,
|
||||
ArrayRef<std::optional<int64_t>> indices_bdims,
|
||||
const c10::SymInt& batch_size,
|
||||
int64_t batch_size,
|
||||
std::optional<int64_t> self_bdim,
|
||||
std::optional<int64_t> values_bdim = std::nullopt) {
|
||||
// There are 3 main cases:
|
||||
@ -89,7 +89,7 @@ static std::vector<std::optional<Tensor>> batchIndices(
|
||||
|
||||
for (size_t i = 0; i < indices.size(); i++) {
|
||||
auto index = indices[i];
|
||||
if (index.has_value() && index->sym_numel() != 0) {
|
||||
if (index.has_value() && index->numel() != 0) {
|
||||
const auto idx_bdim = indices_bdims[i];
|
||||
indices_.emplace_back(maybePadToLogicalRank(moveBatchDimToFront(index.value(), idx_bdim), idx_bdim, maxLogicalRank));
|
||||
if (index.value().dtype() == kBool && indices_bdims[i].has_value()) {
|
||||
@ -346,10 +346,10 @@ namespace {
|
||||
// Code is mostly duplicated from
|
||||
// https://github.com/pytorch/pytorch/blob/fb0e27d38a8fdab4e1c14d6378c9e41cb30fd6a3
|
||||
// /aten/src/ATen/native/TensorAdvancedIndexing.cpp#L294-L312
|
||||
VmapSymDimVector compute_indexed_shape(const Tensor &src, TensorList indices_list)
|
||||
VmapDimVector compute_indexed_shape(const Tensor &src, TensorList indices_list)
|
||||
{
|
||||
int64_t dims_before = 0, dims_indexed = 0;
|
||||
SymIntArrayRef replacement_shape;
|
||||
IntArrayRef replacement_shape;
|
||||
for (const auto dim : c10::irange(indices_list.size())) {
|
||||
if (!indices_list[dim].defined()) {
|
||||
if (dims_indexed == 0) {
|
||||
@ -357,7 +357,7 @@ namespace {
|
||||
}
|
||||
} else {
|
||||
dims_indexed++;
|
||||
replacement_shape = indices_list[dim].sym_sizes();
|
||||
replacement_shape = indices_list[dim].sizes();
|
||||
}
|
||||
}
|
||||
|
||||
@ -365,7 +365,7 @@ namespace {
|
||||
// The offset in these dimensions is computed by the kernel using the index tensor's
|
||||
// values and the stride of src. The new shape is not meaningful. It's used to make
|
||||
// the shape compatible with the result tensor.
|
||||
auto shape = VmapSymDimVector(src.sym_sizes());
|
||||
auto shape = VmapDimVector(src.sizes());
|
||||
int64_t end = dims_before + dims_indexed;
|
||||
shape.erase(shape.begin() + dims_before, shape.begin() + end);
|
||||
shape.insert(shape.begin() + dims_before, replacement_shape.begin(), replacement_shape.end());
|
||||
@ -375,7 +375,7 @@ namespace {
|
||||
// Code is mostly duplicated from
|
||||
// https://github.com/pytorch/pytorch/blob/fb0e27d38a8fdab4e1c14d6378c9e41cb30fd6a3
|
||||
// /aten/src/ATen/native/TensorAdvancedIndexing.cpp#L379-L405
|
||||
VmapSymDimVector get_indexed_shape(Tensor self, const torch::List<std::optional<at::Tensor>> &orig)
|
||||
VmapDimVector get_indexed_shape(Tensor self, const torch::List<std::optional<at::Tensor>> &orig)
|
||||
{
|
||||
at::native::checkIndexTensorTypes(orig, /*allow_int*/ true);
|
||||
// first expand BoolTensor (masks) or ByteTensor (masks) into 1 or more LongTensors
|
||||
@ -406,13 +406,13 @@ namespace {
|
||||
ArrayRef<std::optional<int64_t>> indices_bdims,
|
||||
const Tensor &values,
|
||||
std::optional<int64_t> values_bdim,
|
||||
std::optional<c10::SymInt> opt_batch_size = {}) {
|
||||
std::optional<int64_t> opt_batch_size = {}) {
|
||||
|
||||
Tensor self_ = moveBatchDimToFront(self, self_bdim);
|
||||
Tensor values_ = moveBatchDimToFront(values, values_bdim);
|
||||
// for inplace variants `index_put_` and `_index_put_impl_` we find the batch_size
|
||||
// here while for `index_put` does it outside of this function.
|
||||
const auto batch_size = opt_batch_size ? opt_batch_size.value() : self_.sym_size(0);
|
||||
const auto batch_size = opt_batch_size ? opt_batch_size.value() : self_.size(0);
|
||||
self_ = ensure_has_bdim(self_, self_bdim.has_value(), batch_size);
|
||||
values_ = ensure_has_bdim(values_, values_bdim.has_value(), batch_size);
|
||||
TORCH_INTERNAL_ASSERT(indices.size() == indices_bdims.size());
|
||||
@ -431,7 +431,7 @@ namespace {
|
||||
|
||||
// number of unit dims (for broadcasting value to indexed_shape)
|
||||
auto n_unit_dims = indexed_shape.size() - values_sizes.size();
|
||||
VmapSymDimVector new_values_shape(values_sizes.size() + n_unit_dims);
|
||||
VmapDimVector new_values_shape(values_sizes.size() + n_unit_dims);
|
||||
|
||||
// add the batch-dim
|
||||
new_values_shape[0] = batch_size;
|
||||
@ -445,7 +445,7 @@ namespace {
|
||||
// since batch and unit dims are already be filled.
|
||||
new_values_shape[idx + n_unit_dims] = values_sizes[idx];
|
||||
}
|
||||
values_ = values_.view_symint(new_values_shape);
|
||||
values_ = values_.view(new_values_shape);
|
||||
}
|
||||
|
||||
return std::make_tuple(self_, indices_, values_);
|
||||
@ -613,14 +613,14 @@ std::tuple<Tensor, std::optional<int64_t>> index_put_batch_rule(
|
||||
TORCH_INTERNAL_ASSERT(indices.size() == indices_bdims.size());
|
||||
|
||||
// find the batch_size
|
||||
c10::SymInt batch_size = 0;
|
||||
int64_t batch_size = 0;
|
||||
if (self_bdim || values_bdim) {
|
||||
batch_size = get_bdim_size2_symint(self, self_bdim, values, values_bdim);
|
||||
batch_size = get_bdim_size2(self, self_bdim, values, values_bdim);
|
||||
} else {
|
||||
// one or more of the indices is batched.
|
||||
for (size_t i = 0; i < indices.size(); i++) {
|
||||
if (indices_bdims[i] && indices[i].has_value()) {
|
||||
batch_size = indices[i].value().sym_size(*indices_bdims[i]);
|
||||
batch_size = indices[i].value().size(*indices_bdims[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,7 +102,7 @@ static Tensor moveDimToFrontAndExpand(Tensor tensor, std::optional<int64_t> dim,
|
||||
} else {
|
||||
tensor = tensor.unsqueeze(0);
|
||||
auto expanded_sizes = tensor.sym_sizes().vec();
|
||||
expanded_sizes[0] = std::move(size);
|
||||
expanded_sizes[0] = size;
|
||||
tensor = tensor.expand_symint(expanded_sizes);
|
||||
}
|
||||
return tensor;
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
#include <ATen/WrapDimUtils.h>
|
||||
#include <ATen/functorch/TensorWrapper.h>
|
||||
#include <ATen/functorch/BatchedTensorImpl.h>
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/Dispatch.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <ATen/NamedTensorUtils.h>
|
||||
|
||||
@ -12,15 +12,14 @@ namespace at::mps {
|
||||
// The real implementation of MPSHooksInterface
|
||||
struct MPSHooks : public at::MPSHooksInterface {
|
||||
MPSHooks(at::MPSHooksArgs) {}
|
||||
void init() const override;
|
||||
void initMPS() const override;
|
||||
|
||||
// MPSDevice interface
|
||||
bool hasMPS() const override;
|
||||
bool isOnMacOSorNewer(unsigned major, unsigned minor) const override;
|
||||
|
||||
// MPSGeneratorImpl interface
|
||||
const Generator& getDefaultGenerator(
|
||||
DeviceIndex device_index = -1) const override;
|
||||
const Generator& getDefaultMPSGenerator() const override;
|
||||
|
||||
// MPSStream interface
|
||||
void deviceSynchronize() const override;
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
|
||||
namespace at::mps {
|
||||
|
||||
void MPSHooks::init() const {
|
||||
void MPSHooks::initMPS() const {
|
||||
C10_LOG_API_USAGE_ONCE("aten.init.mps");
|
||||
// TODO: initialize MPS devices and streams here
|
||||
}
|
||||
@ -59,7 +59,7 @@ Allocator* MPSHooks::getMPSDeviceAllocator() const {
|
||||
return at::mps::GetMPSAllocator();
|
||||
}
|
||||
|
||||
const Generator& MPSHooks::getDefaultGenerator([[maybe_unused]] DeviceIndex device_index) const {
|
||||
const Generator& MPSHooks::getDefaultMPSGenerator() const {
|
||||
return at::mps::detail::getDefaultMPSGenerator();
|
||||
}
|
||||
|
||||
|
||||
0
aten/src/ATen/native/LegacyBridge.cpp
Normal file
0
aten/src/ATen/native/LegacyBridge.cpp
Normal file
@ -207,7 +207,6 @@ TORCH_META_FUNC(mm)(const Tensor & self, const Tensor & mat2) {
|
||||
|
||||
TORCH_META_FUNC(linalg_vector_norm)(const Tensor& self, const Scalar& scalar_ord, OptionalIntArrayRef opt_dim, bool keepdim, std::optional<ScalarType> opt_dtype) {
|
||||
at::native::checkFloatingOrComplex(self, "linalg.vector_norm");
|
||||
TORCH_CHECK(!at::isComplexType(scalar_ord.type()), "linalg.vector_norm: Expected a non-complex scalar as the order of norm.");
|
||||
|
||||
auto dim = opt_dim.value_or(IntArrayRef{});
|
||||
// Casting a large integer to a double will just introduce an error for
|
||||
@ -2893,7 +2892,6 @@ Tensor linalg_matrix_norm(
|
||||
bool keepdim,
|
||||
std::optional<ScalarType> opt_dtype) {
|
||||
// Check ord first as it will be used in the dtype check of A
|
||||
TORCH_CHECK(!at::isComplexType(scalar_ord.type()), "linalg.matrix_norm: Expected a non-complex scalar as the order of norm.");
|
||||
auto ord = scalar_ord.toDouble();
|
||||
auto abs_ord = std::abs(ord);
|
||||
TORCH_CHECK(abs_ord == 2. || abs_ord == 1. || abs_ord == INFINITY, "linalg.matrix_norm: Order ", ord, " not supported.");
|
||||
|
||||
@ -2291,7 +2291,7 @@ bool cpu_equal(const Tensor& self, const Tensor& other) {
|
||||
other_data += strides[1];
|
||||
}
|
||||
});
|
||||
}), kBool, kBFloat16, kHalf, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
|
||||
}), kBool, kBFloat16, kHalf, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
|
||||
return result.load();
|
||||
}
|
||||
|
||||
|
||||
@ -5,8 +5,6 @@
|
||||
#include <ATen/Dispatch.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
#ifndef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/Functions.h>
|
||||
#include <ATen/NativeFunctions.h>
|
||||
@ -35,17 +33,6 @@ Tensor _bincount_cpu_template(
|
||||
AT_ERROR("bincount only supports 1-d non-negative integral inputs.");
|
||||
}
|
||||
|
||||
// Ensure max_val < 2 ^ 63 - 1 (9223372036854775807)
|
||||
auto max_val = *self.max().data_ptr<input_t>();
|
||||
if (max_val >= std::numeric_limits<int64_t>::max()) {
|
||||
AT_ERROR(
|
||||
"maximum value of input overflowed, it should be < ",
|
||||
std::numeric_limits<int64_t>::max(),
|
||||
" but got ",
|
||||
max_val
|
||||
);
|
||||
}
|
||||
|
||||
bool has_weights = weights.defined();
|
||||
if (has_weights && (weights.dim() != 1 || weights.size(0) != self.size(0))) {
|
||||
AT_ERROR("weights should be 1-d and have the same length as input");
|
||||
@ -53,7 +40,7 @@ Tensor _bincount_cpu_template(
|
||||
|
||||
Tensor output;
|
||||
int64_t self_size = self.size(0);
|
||||
int64_t nbins = static_cast<int64_t>(max_val) + 1L;
|
||||
int64_t nbins = static_cast<int64_t>(*self.max().data_ptr<input_t>()) + 1L;
|
||||
nbins = std::max(nbins, minlength); // at least minlength # of bins
|
||||
|
||||
const input_t* self_p = self.const_data_ptr<input_t>();
|
||||
|
||||
@ -1435,8 +1435,8 @@ Tensor & index_select_out_cpu_(const Tensor & self, int64_t dim, const Tensor &
|
||||
});
|
||||
});
|
||||
} else {
|
||||
AT_DISPATCH_V2(
|
||||
self.scalar_type(), "index_select", AT_WRAP([&index_contig, &self, &result, &dim, &numel] {
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND4(ScalarType::ComplexHalf, ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16,
|
||||
self.scalar_type(), "index_select", [&index_contig, &self, &result, &dim, &numel] {
|
||||
auto self_stride = self.dim() == 0 ? 1 : self.stride(dim);
|
||||
auto result_stride = result.dim() == 0 ? 1 : result.stride(dim);
|
||||
|
||||
@ -1453,7 +1453,7 @@ Tensor & index_select_out_cpu_(const Tensor & self, int64_t dim, const Tensor &
|
||||
*(result_data_ptr + i * result_stride) = *self_ip;
|
||||
}
|
||||
});
|
||||
}), AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), ScalarType::ComplexHalf, ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16, AT_EXPAND(AT_FLOAT8_TYPES));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -106,7 +106,7 @@ inline Tensor& fill_empty_deterministic_(Tensor& tensor) {
|
||||
AT_DISPATCH_V2(
|
||||
tensor.scalar_type(), "fill_empty_deterministic_", AT_WRAP([&]() {
|
||||
tensor.fill_(std::numeric_limits<scalar_t>::quiet_NaN());
|
||||
}), AT_EXPAND(AT_FLOATING_TYPES), AT_EXPAND(AT_COMPLEX_TYPES), AT_EXPAND(AT_FLOAT8_TYPES), kBFloat16, kHalf, kComplexHalf);
|
||||
}), AT_EXPAND(AT_FLOATING_TYPES), AT_EXPAND(AT_COMPLEX_TYPES), AT_EXPAND(AT_FLOAT8_TYPES), kBFloat16, kHalf);
|
||||
} else {
|
||||
AT_DISPATCH_V2(
|
||||
tensor.scalar_type(), "fill_empty_deterministic_", AT_WRAP([&]() {
|
||||
|
||||
@ -44,7 +44,7 @@ Scalar _local_scalar_dense_cuda(const Tensor& self) {
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
at::cuda::memcpy_and_sync((void *)value.const_data_ptr<scalar_t>(), self.const_data_ptr<scalar_t>(), sizeof(scalar_t), cudaMemcpyDeviceToHost, stream);
|
||||
r = Scalar(*value.const_data_ptr<scalar_t>());
|
||||
}), AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
|
||||
}), AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
|
||||
#if defined(USE_ROCM)
|
||||
} else {
|
||||
auto cpu_self = self.cpu();
|
||||
|
||||
@ -285,64 +285,44 @@ struct Copy<dst_t, c10::complex<float>> {
|
||||
}
|
||||
};
|
||||
|
||||
#define AT_DISPATCH_SOURCE_TYPES(TYPE, NAME, ...) \
|
||||
AT_DISPATCH_SWITCH( \
|
||||
TYPE, \
|
||||
NAME, \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Byte, \
|
||||
src_t, \
|
||||
__VA_ARGS__) AT_PRIVATE_CASE_TYPE_USING_HINT(at::ScalarType::Char, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Long, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Short, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Int, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Double, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Float, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::ComplexDouble, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::ComplexFloat, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Half, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::BFloat16, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Bool, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType:: \
|
||||
Float8_e4m3fn, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType:: \
|
||||
Float8_e4m3fnuz, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType:: \
|
||||
Float8_e5m2, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType:: \
|
||||
Float8_e5m2fnuz, \
|
||||
src_t, \
|
||||
__VA_ARGS__))
|
||||
#define AT_DISPATCH_SOURCE_TYPES(TYPE, NAME, ...) \
|
||||
AT_DISPATCH_SWITCH( \
|
||||
TYPE, \
|
||||
NAME, \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Byte, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Char, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Long, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Short, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Int, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Double, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Float, src_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::ComplexDouble, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::ComplexFloat, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Half, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::BFloat16, \
|
||||
src_t, \
|
||||
__VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE_USING_HINT( \
|
||||
at::ScalarType::Bool, \
|
||||
src_t, \
|
||||
__VA_ARGS__))
|
||||
|
||||
namespace {
|
||||
|
||||
@ -430,14 +410,10 @@ void foreach_tensor_copy_list_kernel_cuda_(
|
||||
|
||||
std::vector<std::vector<at::Tensor>> tensor_lists{src.vec(), self.vec()};
|
||||
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND7(
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(
|
||||
ScalarType::Half,
|
||||
ScalarType::BFloat16,
|
||||
ScalarType::Bool,
|
||||
ScalarType::Float8_e4m3fn,
|
||||
ScalarType::Float8_e4m3fnuz,
|
||||
ScalarType::Float8_e5m2,
|
||||
ScalarType::Float8_e5m2fnuz,
|
||||
self[0].scalar_type(),
|
||||
"foreach_tensor_copy",
|
||||
[&]() {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user