mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-29 03:04:55 +08:00
Compare commits
54 Commits
greenconte
...
perf_ops
| Author | SHA1 | Date | |
|---|---|---|---|
| 7669445a70 | |||
| b7eae1cc34 | |||
| a710d65523 | |||
| eddf149b0c | |||
| 62a91acda9 | |||
| 45760a2f7f | |||
| 5fa2fe9539 | |||
| 2e3d0429c2 | |||
| c8a53c3383 | |||
| 682d542bfb | |||
| a60037fd72 | |||
| bfc9680175 | |||
| 5031e026fc | |||
| 195779ec3b | |||
| e934b6ab40 | |||
| ca59a71675 | |||
| 93ad3fec44 | |||
| 783f8064d1 | |||
| f47fa8d2f8 | |||
| 3635731fc2 | |||
| 98a71c71b2 | |||
| 86e3803f3b | |||
| 056bcfc333 | |||
| cc2b171704 | |||
| 7b16f72b09 | |||
| f47e539765 | |||
| 49e5e122fe | |||
| 6b8cc19597 | |||
| d683fb9ebe | |||
| 9eca494626 | |||
| 7f5b0bcec8 | |||
| 4c257bca07 | |||
| 05bb4d4fc6 | |||
| 8d0cafb8bb | |||
| 629de8d7ba | |||
| 71ae2d8280 | |||
| 2fe66701c1 | |||
| c021d0349e | |||
| c6f1a29b17 | |||
| 54c9527a81 | |||
| cf31d4b744 | |||
| 9c701f03ee | |||
| c193ed6c84 | |||
| eab7bd0d4c | |||
| 199318f978 | |||
| 9b226b2ce4 | |||
| 6357d4e05a | |||
| 162e7d3c20 | |||
| ada9c165dd | |||
| 461c7ad698 | |||
| 819159610d | |||
| d257ebf9c7 | |||
| aab478833d | |||
| ba1319f414 |
@ -31,7 +31,8 @@ pip install -r /pytorch/requirements.txt
|
|||||||
pip install auditwheel==6.2.0 wheel
|
pip install auditwheel==6.2.0 wheel
|
||||||
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
||||||
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
||||||
python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
|
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||||
|
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
|
||||||
else
|
else
|
||||||
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
|
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
|
||||||
export USE_SYSTEM_NCCL=1
|
export USE_SYSTEM_NCCL=1
|
||||||
@ -45,5 +46,6 @@ else
|
|||||||
export USE_NVIDIA_PYPI_LIBS=1
|
export USE_NVIDIA_PYPI_LIBS=1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
|
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||||
|
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -317,7 +317,7 @@ if __name__ == "__main__":
|
|||||||
).decode()
|
).decode()
|
||||||
|
|
||||||
print("Building PyTorch wheel")
|
print("Building PyTorch wheel")
|
||||||
build_vars = ""
|
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
|
||||||
# MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
|
# MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
|
||||||
if enable_cuda:
|
if enable_cuda:
|
||||||
build_vars += "MAX_JOBS=5 "
|
build_vars += "MAX_JOBS=5 "
|
||||||
|
|||||||
@ -241,7 +241,7 @@ def wait_for_connection(addr, port, timeout=15, attempt_cnt=5):
|
|||||||
try:
|
try:
|
||||||
with socket.create_connection((addr, port), timeout=timeout):
|
with socket.create_connection((addr, port), timeout=timeout):
|
||||||
return
|
return
|
||||||
except (ConnectionRefusedError, TimeoutError): # noqa: PERF203
|
except (ConnectionRefusedError, socket.timeout): # noqa: PERF203
|
||||||
if i == attempt_cnt - 1:
|
if i == attempt_cnt - 1:
|
||||||
raise
|
raise
|
||||||
time.sleep(timeout)
|
time.sleep(timeout)
|
||||||
@ -1004,7 +1004,7 @@ if __name__ == "__main__":
|
|||||||
install_condaforge_python(host, args.python_version)
|
install_condaforge_python(host, args.python_version)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
python_version = args.python_version if args.python_version is not None else "3.10"
|
python_version = args.python_version if args.python_version is not None else "3.9"
|
||||||
|
|
||||||
if args.use_torch_from_pypi:
|
if args.use_torch_from_pypi:
|
||||||
configure_system(host, compiler=args.compiler, python_version=python_version)
|
configure_system(host, compiler=args.compiler, python_version=python_version)
|
||||||
|
|||||||
@ -69,8 +69,7 @@ RUN bash ./install_cuda.sh 13.0
|
|||||||
ENV DESIRED_CUDA=13.0
|
ENV DESIRED_CUDA=13.0
|
||||||
|
|
||||||
FROM ${ROCM_IMAGE} as rocm
|
FROM ${ROCM_IMAGE} as rocm
|
||||||
ARG PYTORCH_ROCM_ARCH
|
ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
|
||||||
ADD ./common/install_mkl.sh install_mkl.sh
|
ADD ./common/install_mkl.sh install_mkl.sh
|
||||||
RUN bash ./install_mkl.sh && rm install_mkl.sh
|
RUN bash ./install_mkl.sh && rm install_mkl.sh
|
||||||
ENV MKLROOT /opt/intel
|
ENV MKLROOT /opt/intel
|
||||||
|
|||||||
@ -36,12 +36,6 @@ case ${DOCKER_TAG_PREFIX} in
|
|||||||
;;
|
;;
|
||||||
rocm*)
|
rocm*)
|
||||||
BASE_TARGET=rocm
|
BASE_TARGET=rocm
|
||||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
|
||||||
# add gfx950 conditionally starting in ROCm 7.0
|
|
||||||
if [[ "$ROCM_VERSION" == *"7.0"* ]]; then
|
|
||||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
|
||||||
fi
|
|
||||||
EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
|
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
|
echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
|
||||||
|
|||||||
@ -262,10 +262,13 @@ case "$tag" in
|
|||||||
TRITON_CPU=yes
|
TRITON_CPU=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-linter)
|
pytorch-linux-jammy-linter)
|
||||||
PYTHON_VERSION=3.10
|
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||||
|
# We will need to update mypy version eventually, but that's for another day. The task
|
||||||
|
# would be to upgrade mypy to 1.0.0 with Python 3.11
|
||||||
|
PYTHON_VERSION=3.9
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter)
|
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter)
|
||||||
PYTHON_VERSION=3.10
|
PYTHON_VERSION=3.9
|
||||||
CUDA_VERSION=12.8.1
|
CUDA_VERSION=12.8.1
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
e0dda9059d082537cee36be6c5e4fe3b18c880c0
|
56392aa978594cc155fa8af48cd949f5b5f1823a
|
||||||
|
|||||||
@ -1,2 +1,2 @@
|
|||||||
transformers==4.56.0
|
transformers==4.54.0
|
||||||
soxr==0.5.0
|
soxr==0.5.0
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
v2.28.3-1
|
v2.27.5-1
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
v2.28.3-1
|
v2.27.7-1
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
bbb06c0334a6772b92d24bde54956e675c8c6604
|
5ae38bdb0dc066c5823e34dc9797afb9de42c866
|
||||||
|
|||||||
@ -42,27 +42,22 @@ install_pip_dependencies() {
|
|||||||
# A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
|
# A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
|
||||||
# numba and scipy version used in PyTorch CI
|
# numba and scipy version used in PyTorch CI
|
||||||
conda_run pip uninstall -y numba scipy
|
conda_run pip uninstall -y numba scipy
|
||||||
# Yaspin is needed for running CI test (get_benchmark_analysis_data.py)
|
|
||||||
pip_install yaspin==3.1.0
|
|
||||||
|
|
||||||
popd
|
popd
|
||||||
}
|
}
|
||||||
|
|
||||||
setup_executorch() {
|
setup_executorch() {
|
||||||
|
pushd executorch
|
||||||
|
|
||||||
export PYTHON_EXECUTABLE=python
|
export PYTHON_EXECUTABLE=python
|
||||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON -DEXECUTORCH_BUILD_TESTS=ON"
|
export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||||
|
|
||||||
as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
|
as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
|
||||||
|
popd
|
||||||
}
|
}
|
||||||
|
|
||||||
if [ $# -eq 0 ]; then
|
clone_executorch
|
||||||
clone_executorch
|
install_buck2
|
||||||
install_buck2
|
install_conda_dependencies
|
||||||
install_conda_dependencies
|
install_pip_dependencies
|
||||||
install_pip_dependencies
|
setup_executorch
|
||||||
pushd executorch
|
|
||||||
setup_executorch
|
|
||||||
popd
|
|
||||||
else
|
|
||||||
"$@"
|
|
||||||
fi
|
|
||||||
|
|||||||
@ -12,8 +12,8 @@ function do_install() {
|
|||||||
|
|
||||||
rocm_version_nodot=${rocm_version//./}
|
rocm_version_nodot=${rocm_version//./}
|
||||||
|
|
||||||
# https://github.com/icl-utk-edu/magma/pull/65
|
# Version 2.7.2 + ROCm related updates
|
||||||
MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
|
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||||
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
||||||
|
|
||||||
rocm_dir="/opt/rocm"
|
rocm_dir="/opt/rocm"
|
||||||
|
|||||||
@ -40,16 +40,12 @@ case ${DOCKER_TAG_PREFIX} in
|
|||||||
;;
|
;;
|
||||||
rocm*)
|
rocm*)
|
||||||
# we want the patch version of 6.4 instead
|
# we want the patch version of 6.4 instead
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
|
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
|
||||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
||||||
fi
|
fi
|
||||||
BASE_TARGET=rocm
|
BASE_TARGET=rocm
|
||||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
# add gfx950 conditionally starting in ROCm 7.0
|
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
|
||||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
|
||||||
fi
|
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
|||||||
@ -82,7 +82,7 @@ case ${image} in
|
|||||||
;;
|
;;
|
||||||
manylinux2_28-builder:rocm*)
|
manylinux2_28-builder:rocm*)
|
||||||
# we want the patch version of 6.4 instead
|
# we want the patch version of 6.4 instead
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
|
if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
|
||||||
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
|
||||||
fi
|
fi
|
||||||
TARGET=rocm_final
|
TARGET=rocm_final
|
||||||
@ -90,10 +90,6 @@ case ${image} in
|
|||||||
DEVTOOLSET_VERSION="11"
|
DEVTOOLSET_VERSION="11"
|
||||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
# add gfx950 conditionally starting in ROCm 7.0
|
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
|
||||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
|
||||||
fi
|
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||||
;;
|
;;
|
||||||
manylinux2_28-builder:xpu)
|
manylinux2_28-builder:xpu)
|
||||||
|
|||||||
@ -93,9 +93,8 @@ librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x"
|
|||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
mypy==1.16.0 ; platform_system != "Windows"
|
mypy==1.16.0
|
||||||
# Pin MyPy version because new errors are likely to appear with each release
|
# Pin MyPy version because new errors are likely to appear with each release
|
||||||
# Skip on Windows as lots of type annotations are POSIX specific
|
|
||||||
#Description: linter
|
#Description: linter
|
||||||
#Pinned versions: 1.16.0
|
#Pinned versions: 1.16.0
|
||||||
#test that import: test_typing.py, test_type_hints.py
|
#test that import: test_typing.py, test_type_hints.py
|
||||||
@ -112,6 +111,8 @@ ninja==1.11.1.3
|
|||||||
#Pinned versions: 1.11.1.3
|
#Pinned versions: 1.11.1.3
|
||||||
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
||||||
|
|
||||||
|
numba==0.49.0 ; python_version < "3.9" and platform_machine != "s390x"
|
||||||
|
numba==0.55.2 ; python_version == "3.9" and platform_machine != "s390x"
|
||||||
numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
|
numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
|
||||||
numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
||||||
#Description: Just-In-Time Compiler for Numerical Functions
|
#Description: Just-In-Time Compiler for Numerical Functions
|
||||||
@ -132,7 +133,7 @@ numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
|||||||
#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
|
#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
|
||||||
#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
|
#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
|
||||||
#test_binary_ufuncs.py
|
#test_binary_ufuncs.py
|
||||||
numpy==1.22.4; python_version == "3.10"
|
numpy==1.22.4; python_version == "3.9" or python_version == "3.10"
|
||||||
numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
|
numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
|
||||||
numpy==2.1.2; python_version >= "3.13"
|
numpy==2.1.2; python_version >= "3.13"
|
||||||
|
|
||||||
@ -324,6 +325,8 @@ pywavelets==1.7.0 ; python_version >= "3.12"
|
|||||||
lxml==5.3.0
|
lxml==5.3.0
|
||||||
#Description: This is a requirement of unittest-xml-reporting
|
#Description: This is a requirement of unittest-xml-reporting
|
||||||
|
|
||||||
|
# Python-3.9 binaries
|
||||||
|
|
||||||
PyGithub==2.3.0
|
PyGithub==2.3.0
|
||||||
|
|
||||||
sympy==1.13.3
|
sympy==1.13.3
|
||||||
|
|||||||
@ -1,15 +1,8 @@
|
|||||||
sphinx==5.3.0
|
sphinx==5.3.0
|
||||||
#Description: This is used to generate PyTorch docs
|
#Description: This is used to generate PyTorch docs
|
||||||
#Pinned versions: 5.3.0
|
#Pinned versions: 5.3.0
|
||||||
|
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@1657ad2fc1acdc98aa719eebecbb0128a7c13ce4#egg=pytorch_sphinx_theme2
|
||||||
|
|
||||||
standard-imghdr==3.13.0; python_version >= "3.13"
|
|
||||||
#Description: This is needed by Sphinx, so it needs to be added here.
|
|
||||||
# The reasons are as follows:
|
|
||||||
# 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr);
|
|
||||||
# 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13.
|
|
||||||
# Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency.
|
|
||||||
|
|
||||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@d53b0ffb9b1cda68260693ea98f3483823c88d8e#egg=pytorch_sphinx_theme2
|
|
||||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||||
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
||||||
# something related to Docker setup. We can investigate this later.
|
# something related to Docker setup. We can investigate this later.
|
||||||
|
|||||||
@ -41,6 +41,7 @@ def sample_vllm_test_library():
|
|||||||
"pytest -v -s basic_correctness/test_cumem.py",
|
"pytest -v -s basic_correctness/test_cumem.py",
|
||||||
"pytest -v -s basic_correctness/test_basic_correctness.py",
|
"pytest -v -s basic_correctness/test_basic_correctness.py",
|
||||||
"pytest -v -s basic_correctness/test_cpu_offload.py",
|
"pytest -v -s basic_correctness/test_cpu_offload.py",
|
||||||
|
"VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"vllm_basic_models_test": {
|
"vllm_basic_models_test": {
|
||||||
@ -67,12 +68,15 @@ def sample_vllm_test_library():
|
|||||||
"-v",
|
"-v",
|
||||||
"-s",
|
"-s",
|
||||||
"entrypoints/llm",
|
"entrypoints/llm",
|
||||||
|
"--ignore=entrypoints/llm/test_lazy_outlines.py",
|
||||||
"--ignore=entrypoints/llm/test_generate.py",
|
"--ignore=entrypoints/llm/test_generate.py",
|
||||||
|
"--ignore=entrypoints/llm/test_generate_multiple_loras.py",
|
||||||
"--ignore=entrypoints/llm/test_collective_rpc.py",
|
"--ignore=entrypoints/llm/test_collective_rpc.py",
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
"pytest -v -s entrypoints/llm/test_generate.py",
|
"pytest -v -s entrypoints/llm/test_lazy_outlines.py",
|
||||||
"pytest -v -s entrypoints/offline_mode",
|
"pytest -v -s entrypoints/llm/test_generate.py ",
|
||||||
|
"VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"vllm_regression_test": {
|
"vllm_regression_test": {
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
SHELL=/usr/bin/env bash
|
SHELL=/usr/bin/env bash
|
||||||
|
|
||||||
DOCKER_CMD ?= docker
|
DOCKER_CMD ?= docker
|
||||||
DESIRED_ROCM ?= 7.0
|
DESIRED_ROCM ?= 6.4
|
||||||
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
||||||
PACKAGE_NAME = magma-rocm
|
PACKAGE_NAME = magma-rocm
|
||||||
# inherit this from underlying docker image, do not pass this env var to docker
|
# inherit this from underlying docker image, do not pass this env var to docker
|
||||||
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
|
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
|
||||||
|
|
||||||
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||||
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
|
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
|
||||||
@ -16,7 +16,6 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
|||||||
magma-rocm/build_magma.sh
|
magma-rocm/build_magma.sh
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
all: magma-rocm70
|
|
||||||
all: magma-rocm64
|
all: magma-rocm64
|
||||||
all: magma-rocm63
|
all: magma-rocm63
|
||||||
|
|
||||||
@ -25,11 +24,6 @@ clean:
|
|||||||
$(RM) -r magma-*
|
$(RM) -r magma-*
|
||||||
$(RM) -r output
|
$(RM) -r output
|
||||||
|
|
||||||
.PHONY: magma-rocm70
|
|
||||||
magma-rocm70: DESIRED_ROCM := 7.0
|
|
||||||
magma-rocm70:
|
|
||||||
$(DOCKER_RUN)
|
|
||||||
|
|
||||||
.PHONY: magma-rocm64
|
.PHONY: magma-rocm64
|
||||||
magma-rocm64: DESIRED_ROCM := 6.4
|
magma-rocm64: DESIRED_ROCM := 6.4
|
||||||
magma-rocm64:
|
magma-rocm64:
|
||||||
|
|||||||
@ -6,8 +6,8 @@ set -eou pipefail
|
|||||||
# The script expects DESIRED_CUDA and PACKAGE_NAME to be set
|
# The script expects DESIRED_CUDA and PACKAGE_NAME to be set
|
||||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
|
|
||||||
# https://github.com/icl-utk-edu/magma/pull/65
|
# Version 2.7.2 + ROCm related updates
|
||||||
MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
|
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||||
|
|
||||||
# Folders for the build
|
# Folders for the build
|
||||||
PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata
|
PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata
|
||||||
@ -20,7 +20,7 @@ mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RE
|
|||||||
|
|
||||||
# Fetch magma sources and verify checksum
|
# Fetch magma sources and verify checksum
|
||||||
pushd ${PACKAGE_DIR}
|
pushd ${PACKAGE_DIR}
|
||||||
git clone https://github.com/jeffdaily/magma
|
git clone https://bitbucket.org/icl/magma.git
|
||||||
pushd magma
|
pushd magma
|
||||||
git checkout ${MAGMA_VERSION}
|
git checkout ${MAGMA_VERSION}
|
||||||
popd
|
popd
|
||||||
|
|||||||
@ -58,7 +58,7 @@ time python tools/setup_helpers/generate_code.py \
|
|||||||
|
|
||||||
# Build the docs
|
# Build the docs
|
||||||
pushd docs/cpp
|
pushd docs/cpp
|
||||||
time make VERBOSE=1 html
|
time make VERBOSE=1 html -j
|
||||||
|
|
||||||
popd
|
popd
|
||||||
popd
|
popd
|
||||||
|
|||||||
40
.ci/pytorch/functorch_doc_push_script.sh
Executable file
40
.ci/pytorch/functorch_doc_push_script.sh
Executable file
@ -0,0 +1,40 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# This is where the local pytorch install in the docker image is located
|
||||||
|
pt_checkout="/var/lib/jenkins/workspace"
|
||||||
|
source "$pt_checkout/.ci/pytorch/common_utils.sh"
|
||||||
|
echo "functorch_doc_push_script.sh: Invoked with $*"
|
||||||
|
|
||||||
|
set -ex -o pipefail
|
||||||
|
|
||||||
|
version=${DOCS_VERSION:-nightly}
|
||||||
|
echo "version: $version"
|
||||||
|
|
||||||
|
# Build functorch docs
|
||||||
|
pushd $pt_checkout/functorch/docs
|
||||||
|
make html
|
||||||
|
popd
|
||||||
|
|
||||||
|
git clone https://github.com/pytorch/functorch -b gh-pages --depth 1 functorch_ghpages
|
||||||
|
pushd functorch_ghpages
|
||||||
|
|
||||||
|
if [ "$version" == "main" ]; then
|
||||||
|
version=nightly
|
||||||
|
fi
|
||||||
|
|
||||||
|
git rm -rf "$version" || true
|
||||||
|
mv "$pt_checkout/functorch/docs/build/html" "$version"
|
||||||
|
|
||||||
|
git add "$version" || true
|
||||||
|
git status
|
||||||
|
git config user.email "soumith+bot@pytorch.org"
|
||||||
|
git config user.name "pytorchbot"
|
||||||
|
# If there aren't changes, don't make a commit; push is no-op
|
||||||
|
git commit -m "Generate Python docs from pytorch/pytorch@${GITHUB_SHA}" || true
|
||||||
|
git status
|
||||||
|
|
||||||
|
if [[ "${WITH_PUSH:-}" == true ]]; then
|
||||||
|
git push -u origin gh-pages
|
||||||
|
fi
|
||||||
|
|
||||||
|
popd
|
||||||
@ -35,11 +35,10 @@ fi
|
|||||||
|
|
||||||
print_cmake_info
|
print_cmake_info
|
||||||
if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then
|
if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then
|
||||||
# Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls
|
USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel
|
||||||
USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel
|
|
||||||
else
|
else
|
||||||
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
|
# NB: we always build with distributed; USE_DISTRIBUTED turns off all
|
||||||
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
|
# backends (specifically the gloo backend), so test that this case works too
|
||||||
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64
|
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64
|
||||||
fi
|
fi
|
||||||
if which sccache > /dev/null; then
|
if which sccache > /dev/null; then
|
||||||
|
|||||||
@ -13,9 +13,13 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available(
|
|||||||
fi
|
fi
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
python -mpip install -r requirements.txt
|
||||||
|
|
||||||
# enable debug asserts in serialization
|
# enable debug asserts in serialization
|
||||||
export TORCH_SERIALIZATION_DEBUG=1
|
export TORCH_SERIALIZATION_DEBUG=1
|
||||||
|
|
||||||
|
python -mpip install --no-input -r requirements.txt
|
||||||
|
|
||||||
setup_test_python() {
|
setup_test_python() {
|
||||||
# The CircleCI worker hostname doesn't resolve to an address.
|
# The CircleCI worker hostname doesn't resolve to an address.
|
||||||
# This environment variable makes ProcessGroupGloo default to
|
# This environment variable makes ProcessGroupGloo default to
|
||||||
@ -55,7 +59,7 @@ test_python_shard() {
|
|||||||
|
|
||||||
setup_test_python
|
setup_test_python
|
||||||
|
|
||||||
time python test/run_test.py --verbose --exclude-jit-executor --exclude-distributed-tests --exclude-quantization-tests --shard "$1" "$NUM_TEST_SHARDS"
|
time python test/run_test.py --verbose --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS"
|
||||||
|
|
||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,25 +0,0 @@
|
|||||||
From 6e08c9d08e9de59c7af28b720289debbbd384764 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Michael Wang <13521008+isVoid@users.noreply.github.com>
|
|
||||||
Date: Tue, 1 Apr 2025 17:28:05 -0700
|
|
||||||
Subject: [PATCH] Avoid bumping certain driver API to avoid future breakage
|
|
||||||
(#185)
|
|
||||||
|
|
||||||
Co-authored-by: isVoid <isVoid@users.noreply.github.com>
|
|
||||||
---
|
|
||||||
numba_cuda/numba/cuda/cudadrv/driver.py | 3 +++
|
|
||||||
1 file changed, 3 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
|
|
||||||
index 1641bf77..233e9ed7 100644
|
|
||||||
--- a/numba_cuda/numba/cuda/cudadrv/driver.py
|
|
||||||
+++ b/numba_cuda/numba/cuda/cudadrv/driver.py
|
|
||||||
@@ -365,6 +365,9 @@ def _find_api(self, fname):
|
|
||||||
else:
|
|
||||||
variants = ('_v2', '')
|
|
||||||
|
|
||||||
+ if fname in ("cuCtxGetDevice", "cuCtxSynchronize"):
|
|
||||||
+ return getattr(self.lib, fname)
|
|
||||||
+
|
|
||||||
for variant in variants:
|
|
||||||
try:
|
|
||||||
return getattr(self.lib, f'{fname}{variant}')
|
|
||||||
@ -32,16 +32,6 @@ if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /v
|
|||||||
git config --global --add safe.directory /var/lib/jenkins/workspace
|
git config --global --add safe.directory /var/lib/jenkins/workspace
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
# Patch numba to avoid CUDA-13 crash, see https://github.com/pytorch/pytorch/issues/162878
|
|
||||||
NUMBA_CUDA_DIR=$(python -c "import os;import numba.cuda; print(os.path.dirname(numba.cuda.__file__))" 2>/dev/null || true)
|
|
||||||
if [ -n "$NUMBA_CUDA_DIR" ]; then
|
|
||||||
NUMBA_PATCH="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/numba-cuda-13.patch"
|
|
||||||
pushd "$NUMBA_CUDA_DIR"
|
|
||||||
patch -p4 <"$NUMBA_PATCH"
|
|
||||||
popd
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Environment variables:"
|
echo "Environment variables:"
|
||||||
env
|
env
|
||||||
|
|
||||||
@ -322,29 +312,23 @@ test_python_shard() {
|
|||||||
|
|
||||||
# modify LD_LIBRARY_PATH to ensure it has the conda env.
|
# modify LD_LIBRARY_PATH to ensure it has the conda env.
|
||||||
# This set of tests has been shown to be buggy without it for the split-build
|
# This set of tests has been shown to be buggy without it for the split-build
|
||||||
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --exclude-quantization-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||||
|
|
||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
}
|
}
|
||||||
|
|
||||||
test_python() {
|
test_python() {
|
||||||
# shellcheck disable=SC2086
|
# shellcheck disable=SC2086
|
||||||
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --exclude-quantization-tests $INCLUDE_CLAUSE --verbose $PYTHON_TEST_EXTRA_OPTION
|
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose $PYTHON_TEST_EXTRA_OPTION
|
||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
}
|
}
|
||||||
|
|
||||||
test_python_smoke() {
|
test_python_smoke() {
|
||||||
# Smoke tests for H100/B200
|
# Smoke tests for H100
|
||||||
time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 inductor/test_max_autotune $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 inductor/test_max_autotune $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
}
|
}
|
||||||
|
|
||||||
test_python_smoke_b200() {
|
|
||||||
# Targeted smoke tests for B200 - staged approach to avoid too many failures
|
|
||||||
time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
|
||||||
assert_git_not_dirty
|
|
||||||
}
|
|
||||||
|
|
||||||
test_h100_distributed() {
|
test_h100_distributed() {
|
||||||
# Distributed tests at H100
|
# Distributed tests at H100
|
||||||
time python test/run_test.py --include distributed/_composable/test_composability/test_pp_composability.py $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
time python test/run_test.py --include distributed/_composable/test_composability/test_pp_composability.py $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||||
@ -390,7 +374,6 @@ test_dynamo_wrapped_shard() {
|
|||||||
--exclude-distributed-tests \
|
--exclude-distributed-tests \
|
||||||
--exclude-torch-export-tests \
|
--exclude-torch-export-tests \
|
||||||
--exclude-aot-dispatch-tests \
|
--exclude-aot-dispatch-tests \
|
||||||
--exclude-quantization-tests \
|
|
||||||
--shard "$1" "$NUM_TEST_SHARDS" \
|
--shard "$1" "$NUM_TEST_SHARDS" \
|
||||||
--verbose \
|
--verbose \
|
||||||
--upload-artifacts-while-running
|
--upload-artifacts-while-running
|
||||||
@ -1163,12 +1146,6 @@ test_distributed() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
test_quantization() {
|
|
||||||
echo "Testing quantization"
|
|
||||||
|
|
||||||
python test/test_quantization.py
|
|
||||||
}
|
|
||||||
|
|
||||||
test_rpc() {
|
test_rpc() {
|
||||||
echo "Testing RPC C++ tests"
|
echo "Testing RPC C++ tests"
|
||||||
# NB: the ending test_rpc must match the current function name for the current
|
# NB: the ending test_rpc must match the current function name for the current
|
||||||
@ -1563,10 +1540,14 @@ test_executorch() {
|
|||||||
install_torchvision
|
install_torchvision
|
||||||
install_torchaudio
|
install_torchaudio
|
||||||
|
|
||||||
INSTALL_SCRIPT="$(pwd)/.ci/docker/common/install_executorch.sh"
|
|
||||||
|
|
||||||
pushd /executorch
|
pushd /executorch
|
||||||
"${INSTALL_SCRIPT}" setup_executorch
|
|
||||||
|
export PYTHON_EXECUTABLE=python
|
||||||
|
export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||||
|
|
||||||
|
# NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch
|
||||||
|
# from the PR
|
||||||
|
bash .ci/scripts/setup-linux.sh --build-tool cmake
|
||||||
|
|
||||||
echo "Run ExecuTorch unit tests"
|
echo "Run ExecuTorch unit tests"
|
||||||
pytest -v -n auto
|
pytest -v -n auto
|
||||||
@ -1580,14 +1561,17 @@ test_executorch() {
|
|||||||
|
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
# Test torchgen generated code for Executorch.
|
||||||
|
echo "Testing ExecuTorch op registration"
|
||||||
|
"$BUILD_BIN_DIR"/test_edge_op_registration
|
||||||
|
|
||||||
assert_git_not_dirty
|
assert_git_not_dirty
|
||||||
}
|
}
|
||||||
|
|
||||||
test_linux_aarch64() {
|
test_linux_aarch64() {
|
||||||
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
|
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
|
||||||
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
|
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
|
||||||
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops profiler/test_memory_profiler \
|
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops \
|
||||||
distributed/elastic/timer/api_test distributed/elastic/timer/local_timer_example distributed/elastic/timer/local_timer_test \
|
|
||||||
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||||
|
|
||||||
# Dynamo tests
|
# Dynamo tests
|
||||||
@ -1635,11 +1619,12 @@ test_operator_microbenchmark() {
|
|||||||
mkdir -p "$TEST_REPORTS_DIR"
|
mkdir -p "$TEST_REPORTS_DIR"
|
||||||
TEST_DIR=$(pwd)
|
TEST_DIR=$(pwd)
|
||||||
|
|
||||||
|
pip_uninstall torch torchvision torchaudio
|
||||||
|
pip_install torch==2.8.0 torchvision torchaudio ninja --force-reinstall
|
||||||
cd benchmarks/operator_benchmark/pt_extension
|
cd benchmarks/operator_benchmark/pt_extension
|
||||||
python -m pip install .
|
python -m pip install .
|
||||||
|
|
||||||
cd "${TEST_DIR}"/benchmarks/operator_benchmark
|
cd "${TEST_DIR}"/benchmarks/operator_benchmark
|
||||||
|
|
||||||
for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do
|
for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do
|
||||||
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
|
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
|
||||||
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
|
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
|
||||||
@ -1681,8 +1666,6 @@ elif [[ "${TEST_CONFIG}" == *executorch* ]]; then
|
|||||||
test_executorch
|
test_executorch
|
||||||
elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then
|
elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then
|
||||||
test_python_legacy_jit
|
test_python_legacy_jit
|
||||||
elif [[ "$TEST_CONFIG" == 'quantization' ]]; then
|
|
||||||
test_quantization
|
|
||||||
elif [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then
|
elif [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then
|
||||||
# TODO: run some C++ tests
|
# TODO: run some C++ tests
|
||||||
echo "no-op at the moment"
|
echo "no-op at the moment"
|
||||||
@ -1809,14 +1792,10 @@ elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
|
|||||||
test_xpu_bin
|
test_xpu_bin
|
||||||
elif [[ "${TEST_CONFIG}" == smoke ]]; then
|
elif [[ "${TEST_CONFIG}" == smoke ]]; then
|
||||||
test_python_smoke
|
test_python_smoke
|
||||||
elif [[ "${TEST_CONFIG}" == smoke_b200 ]]; then
|
|
||||||
test_python_smoke_b200
|
|
||||||
elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then
|
elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then
|
||||||
test_h100_distributed
|
test_h100_distributed
|
||||||
elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then
|
elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then
|
||||||
test_h100_symm_mem
|
test_h100_symm_mem
|
||||||
elif [[ "${TEST_CONFIG}" == "b200-symm-mem" ]]; then
|
|
||||||
test_h100_symm_mem
|
|
||||||
elif [[ "${TEST_CONFIG}" == h100_cutlass_backend ]]; then
|
elif [[ "${TEST_CONFIG}" == h100_cutlass_backend ]]; then
|
||||||
test_h100_cutlass_backend
|
test_h100_cutlass_backend
|
||||||
else
|
else
|
||||||
|
|||||||
@ -137,7 +137,7 @@ sccache --show-stats
|
|||||||
python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
|
python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
|
||||||
(
|
(
|
||||||
if "%BUILD_ENVIRONMENT%"=="" (
|
if "%BUILD_ENVIRONMENT%"=="" (
|
||||||
echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_ROOT_DIR%\Scripts\activate.bat %CONDA_ROOT_DIR%\envs\py_tmp` in Command Prompt before running Git Bash.
|
echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash.
|
||||||
) else (
|
) else (
|
||||||
copy /Y "dist\*.whl" "%PYTORCH_FINAL_PACKAGE_DIR%"
|
copy /Y "dist\*.whl" "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||||
|
|
||||||
|
|||||||
@ -3,12 +3,12 @@ if "%BUILD_ENVIRONMENT%"=="" (
|
|||||||
) else (
|
) else (
|
||||||
set CONDA_PARENT_DIR=C:\Jenkins
|
set CONDA_PARENT_DIR=C:\Jenkins
|
||||||
)
|
)
|
||||||
set CONDA_ROOT_DIR=%CONDA_PARENT_DIR%\Miniconda3
|
|
||||||
|
|
||||||
:: Be conservative here when rolling out the new AMI with conda. This will try
|
:: Be conservative here when rolling out the new AMI with conda. This will try
|
||||||
:: to install conda as before if it couldn't find the conda installation. This
|
:: to install conda as before if it couldn't find the conda installation. This
|
||||||
:: can be removed eventually after we gain enough confidence in the AMI
|
:: can be removed eventually after we gain enough confidence in the AMI
|
||||||
if not exist %CONDA_ROOT_DIR% (
|
if not exist %CONDA_PARENT_DIR%\Miniconda3 (
|
||||||
set INSTALL_FRESH_CONDA=1
|
set INSTALL_FRESH_CONDA=1
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -17,14 +17,10 @@ if "%INSTALL_FRESH_CONDA%"=="1" (
|
|||||||
if errorlevel 1 exit /b
|
if errorlevel 1 exit /b
|
||||||
if not errorlevel 0 exit /b
|
if not errorlevel 0 exit /b
|
||||||
|
|
||||||
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_ROOT_DIR%
|
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
|
||||||
if errorlevel 1 exit /b
|
if errorlevel 1 exit /b
|
||||||
if not errorlevel 0 exit /b
|
if not errorlevel 0 exit /b
|
||||||
)
|
)
|
||||||
|
|
||||||
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
||||||
call %CONDA_ROOT_DIR%\Scripts\activate.bat %CONDA_ROOT_DIR%
|
call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
|
||||||
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
|
||||||
call conda activate py_tmp
|
|
||||||
|
|
||||||
call pip install -r .ci/docker/requirements-ci.txt
|
|
||||||
|
|||||||
@ -14,7 +14,7 @@ if not errorlevel 0 exit /b
|
|||||||
:: build\torch. Rather than changing all these references, making a copy of torch folder
|
:: build\torch. Rather than changing all these references, making a copy of torch folder
|
||||||
:: from conda to the current workspace is easier. The workspace will be cleaned up after
|
:: from conda to the current workspace is easier. The workspace will be cleaned up after
|
||||||
:: the job anyway
|
:: the job anyway
|
||||||
xcopy /s %CONDA_ROOT_DIR%\envs\py_tmp\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
|
xcopy /s %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
|
||||||
|
|
||||||
pushd .
|
pushd .
|
||||||
if "%VC_VERSION%" == "" (
|
if "%VC_VERSION%" == "" (
|
||||||
|
|||||||
@ -25,7 +25,7 @@ echo Copying over test times file
|
|||||||
robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"
|
robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"
|
||||||
|
|
||||||
echo Run nn tests
|
echo Run nn tests
|
||||||
python run_test.py --exclude-jit-executor --exclude-distributed-tests --exclude-quantization-tests --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose
|
python run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "%SHARD_NUMBER%" "%NUM_TEST_SHARDS%" --verbose
|
||||||
if ERRORLEVEL 1 goto fail
|
if ERRORLEVEL 1 goto fail
|
||||||
|
|
||||||
popd
|
popd
|
||||||
|
|||||||
@ -38,14 +38,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# TODO: Move both of them to Windows AMI
|
# TODO: Move both of them to Windows AMI
|
||||||
python -m pip install tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
|
python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
|
||||||
|
|
||||||
# Copied from https://github.com/pytorch/test-infra/blob/be01a40157c36cd5a48391fdf44a7bc3ebd4c7e3/aws/ami/windows/scripts/Installers/Install-Pip-Dependencies.ps1#L16 with some adjustments
|
|
||||||
# pytest-rerunfailures==10.3 as 10.2 fails with INTERNALERROR> pluggy._manager.PluginValidationError: unknown hook 'pytest_configure_node'
|
|
||||||
# scipy from 1.6.3 to 1.10
|
|
||||||
# expecttest from 0.1.3 to 0.3.0
|
|
||||||
# xdoctest from 1.0.2 to 1.3.0
|
|
||||||
python -m pip install "future==0.18.2" "hypothesis==5.35.1" "expecttest==0.3.0" "librosa>=0.6.2" "scipy==1.10.1" "psutil==5.9.1" "pynvml==11.4.1" "pillow==9.2.0" "unittest-xml-reporting<=3.2.0,>=2.0.0" "pytest==7.1.3" "pytest-xdist==2.5.0" "pytest-flakefinder==1.1.0" "pytest-rerunfailures==10.3" "pytest-shard==0.1.2" "sympy==1.11.1" "xdoctest==1.3.0" "pygments==2.12.0" "opt-einsum>=3.3" "networkx==2.8.8" "mpmath==1.2.1" "pytest-cpp==2.3.0" "boto3==1.35.42"
|
|
||||||
|
|
||||||
# Install Z3 optional dependency for Windows builds.
|
# Install Z3 optional dependency for Windows builds.
|
||||||
python -m pip install z3-solver==4.15.1.0
|
python -m pip install z3-solver==4.15.1.0
|
||||||
@ -59,6 +52,9 @@ python -m pip install parameterized==0.8.1
|
|||||||
# Install pulp for testing ilps under torch\distributed\_tools
|
# Install pulp for testing ilps under torch\distributed\_tools
|
||||||
python -m pip install pulp==2.9.0
|
python -m pip install pulp==2.9.0
|
||||||
|
|
||||||
|
# Install expecttest to merge https://github.com/pytorch/pytorch/pull/155308
|
||||||
|
python -m pip install expecttest==0.3.0
|
||||||
|
|
||||||
run_tests() {
|
run_tests() {
|
||||||
# Run nvidia-smi if available
|
# Run nvidia-smi if available
|
||||||
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
|
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
|
||||||
|
|||||||
@ -63,7 +63,7 @@ if errorlevel 1 exit /b 1
|
|||||||
call %CONDA_HOME%\condabin\activate.bat testenv
|
call %CONDA_HOME%\condabin\activate.bat testenv
|
||||||
if errorlevel 1 exit /b 1
|
if errorlevel 1 exit /b 1
|
||||||
|
|
||||||
call conda install -y -q -c conda-forge libuv=1.51
|
call conda install -y -q -c conda-forge libuv=1.39
|
||||||
call conda install -y -q intel-openmp
|
call conda install -y -q intel-openmp
|
||||||
|
|
||||||
echo "install and test libtorch"
|
echo "install and test libtorch"
|
||||||
|
|||||||
@ -177,7 +177,8 @@ source ~/${desired_python}-build/bin/activate
|
|||||||
retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt"
|
retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt"
|
||||||
retry brew install libomp
|
retry brew install libomp
|
||||||
|
|
||||||
# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule
|
# For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which
|
||||||
|
# is build as part of tensorpipe submodule
|
||||||
export USE_DISTRIBUTED=1
|
export USE_DISTRIBUTED=1
|
||||||
|
|
||||||
export USE_MKLDNN=OFF
|
export USE_MKLDNN=OFF
|
||||||
|
|||||||
47
.circleci/scripts/functorch_doc_push_script.sh
Executable file
47
.circleci/scripts/functorch_doc_push_script.sh
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# =================== The following code **should** be executed inside Docker container ===================
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
sudo apt-get -y update
|
||||||
|
sudo apt-get -y install expect-dev
|
||||||
|
|
||||||
|
# This is where the local pytorch install in the docker image is located
|
||||||
|
pt_checkout="/var/lib/jenkins/workspace"
|
||||||
|
source "$pt_checkout/.ci/pytorch/common_utils.sh"
|
||||||
|
echo "functorch_doc_push_script.sh: Invoked with $*"
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
version=${DOCS_VERSION:-nightly}
|
||||||
|
echo "version: $version"
|
||||||
|
|
||||||
|
# Build functorch docs
|
||||||
|
pushd $pt_checkout/functorch/docs
|
||||||
|
pip -q install -r requirements.txt
|
||||||
|
make html
|
||||||
|
popd
|
||||||
|
|
||||||
|
git clone https://github.com/pytorch/functorch -b gh-pages --depth 1 functorch_ghpages
|
||||||
|
pushd functorch_ghpages
|
||||||
|
|
||||||
|
if [ $version == "main" ]; then
|
||||||
|
version=nightly
|
||||||
|
fi
|
||||||
|
|
||||||
|
git rm -rf "$version" || true
|
||||||
|
mv "$pt_checkout/functorch/docs/build/html" "$version"
|
||||||
|
|
||||||
|
git add "$version" || true
|
||||||
|
git status
|
||||||
|
git config user.email "soumith+bot@pytorch.org"
|
||||||
|
git config user.name "pytorchbot"
|
||||||
|
# If there aren't changes, don't make a commit; push is no-op
|
||||||
|
git commit -m "Generate Python docs from pytorch/pytorch@${GITHUB_SHA}" || true
|
||||||
|
git status
|
||||||
|
|
||||||
|
if [[ "${WITH_PUSH:-}" == true ]]; then
|
||||||
|
git push -u origin gh-pages
|
||||||
|
fi
|
||||||
|
|
||||||
|
popd
|
||||||
|
# =================== The above code **should** be executed inside Docker container ===================
|
||||||
@ -69,8 +69,6 @@ readability-string-compare,
|
|||||||
'
|
'
|
||||||
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
|
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
|
||||||
WarningsAsErrors: '*'
|
WarningsAsErrors: '*'
|
||||||
LineFilter:
|
|
||||||
- name: '/usr/include/.*'
|
|
||||||
CheckOptions:
|
CheckOptions:
|
||||||
cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true
|
cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true
|
||||||
cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true
|
cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true
|
||||||
|
|||||||
4
.github/ISSUE_TEMPLATE/ci-sev.md
vendored
4
.github/ISSUE_TEMPLATE/ci-sev.md
vendored
@ -1,10 +1,6 @@
|
|||||||
---
|
---
|
||||||
name: "⚠️ CI SEV"
|
name: "⚠️ CI SEV"
|
||||||
about: Tracking incidents for PyTorch's CI infra.
|
about: Tracking incidents for PyTorch's CI infra.
|
||||||
title: ''
|
|
||||||
labels: ''
|
|
||||||
assignees: ''
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
> NOTE: Remember to label this issue with "`ci: sev`"
|
> NOTE: Remember to label this issue with "`ci: sev`"
|
||||||
|
|||||||
18
.github/ISSUE_TEMPLATE/disable-autorevert.md
vendored
18
.github/ISSUE_TEMPLATE/disable-autorevert.md
vendored
@ -1,18 +0,0 @@
|
|||||||
---
|
|
||||||
name: DISABLE AUTOREVERT
|
|
||||||
about: Disables autorevert when open
|
|
||||||
title: "❌\U0001F519 [DISABLE AUTOREVERT]"
|
|
||||||
labels: 'ci: disable-autorevert'
|
|
||||||
assignees: ''
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
This issue, while open, disables the autorevert functionality.
|
|
||||||
|
|
||||||
More details can be found [here](https://github.com/pytorch/test-infra/blob/main/aws/lambda/pytorch-auto-revert/README.md)
|
|
||||||
|
|
||||||
|
|
||||||
## Why are you disabling autorevert?
|
|
||||||
|
|
||||||
|
|
||||||
## Links to any issues/commits/errors that shows the source of problem
|
|
||||||
6
.github/ISSUE_TEMPLATE/disable-ci-jobs.md
vendored
6
.github/ISSUE_TEMPLATE/disable-ci-jobs.md
vendored
@ -1,10 +1,8 @@
|
|||||||
---
|
---
|
||||||
name: Disable CI jobs (PyTorch Dev Infra only)
|
name: Disable CI jobs (PyTorch Dev Infra only)
|
||||||
about: Use this template to disable CI jobs
|
about: Use this template to disable CI jobs
|
||||||
title: DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME]
|
title: "DISABLED [WORKFLOW_NAME] / [PLATFORM_NAME] / [JOB_NAME]"
|
||||||
labels: 'module: ci'
|
labels: "module: ci"
|
||||||
assignees: ''
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
> For example, DISABLED pull / win-vs2022-cpu-py3 / test (default). Once
|
> For example, DISABLED pull / win-vs2022-cpu-py3 / test (default). Once
|
||||||
|
|||||||
3
.github/actionlint.yaml
vendored
3
.github/actionlint.yaml
vendored
@ -22,9 +22,6 @@ self-hosted-runner:
|
|||||||
- linux.arm64.m7g.4xlarge
|
- linux.arm64.m7g.4xlarge
|
||||||
- linux.arm64.m7g.4xlarge.ephemeral
|
- linux.arm64.m7g.4xlarge.ephemeral
|
||||||
- linux.arm64.r7g.12xlarge.memory
|
- linux.arm64.r7g.12xlarge.memory
|
||||||
- linux.aws.h100
|
|
||||||
- linux.aws.h100.4
|
|
||||||
- linux.aws.h100.8
|
|
||||||
- linux.4xlarge.nvidia.gpu
|
- linux.4xlarge.nvidia.gpu
|
||||||
- linux.8xlarge.nvidia.gpu
|
- linux.8xlarge.nvidia.gpu
|
||||||
- linux.16xlarge.nvidia.gpu
|
- linux.16xlarge.nvidia.gpu
|
||||||
|
|||||||
@ -264,7 +264,7 @@ def unzip_artifact_and_replace_files() -> None:
|
|||||||
change_content_to_new_version(f"artifacts/dist/{old_stem}/torch/version.py")
|
change_content_to_new_version(f"artifacts/dist/{old_stem}/torch/version.py")
|
||||||
|
|
||||||
for file in Path(f"artifacts/dist/{old_stem}").glob(
|
for file in Path(f"artifacts/dist/{old_stem}").glob(
|
||||||
"*.dist-info/*",
|
"*.dist-info/**",
|
||||||
):
|
):
|
||||||
change_content_to_new_version(file)
|
change_content_to_new_version(file)
|
||||||
|
|
||||||
|
|||||||
16
.github/actions/setup-win/action.yml
vendored
16
.github/actions/setup-win/action.yml
vendored
@ -6,12 +6,6 @@ inputs:
|
|||||||
cuda-version:
|
cuda-version:
|
||||||
description: which cuda version to install, 'cpu' for none
|
description: which cuda version to install, 'cpu' for none
|
||||||
required: true
|
required: true
|
||||||
python-version:
|
|
||||||
required: false
|
|
||||||
type: string
|
|
||||||
default: "3.10"
|
|
||||||
description: |
|
|
||||||
The python version to be used. Will be 3.10 by default
|
|
||||||
|
|
||||||
runs:
|
runs:
|
||||||
using: composite
|
using: composite
|
||||||
@ -44,24 +38,18 @@ runs:
|
|||||||
CONDA="C:\Jenkins\Miniconda3\condabin\conda.bat"
|
CONDA="C:\Jenkins\Miniconda3\condabin\conda.bat"
|
||||||
|
|
||||||
{
|
{
|
||||||
echo "CONDA=${CONDA}";
|
|
||||||
echo "CONDA_RUN=${CONDA} run --no-capture-output";
|
echo "CONDA_RUN=${CONDA} run --no-capture-output";
|
||||||
echo "CONDA_BUILD=${CONDA} run conda-build";
|
echo "CONDA_BUILD=${CONDA} run conda-build";
|
||||||
echo "CONDA_INSTALL=${CONDA} install";
|
echo "CONDA_INSTALL=${CONDA} install";
|
||||||
} >> "${GITHUB_ENV}"
|
} >> "${GITHUB_ENV}"
|
||||||
|
|
||||||
- name: Setup Python3
|
- name: Setup Python3
|
||||||
env:
|
|
||||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
set +e
|
set +e
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
# Create new py_tmp env with python-version
|
PYTHON3=$(${CONDA_RUN} which python3)
|
||||||
${CONDA} create -y -n py_tmp python=${PYTHON_VERSION} intel-openmp libuv
|
|
||||||
|
|
||||||
PYTHON3=$(${CONDA_RUN} -n py_tmp which python3)
|
|
||||||
EXIT_CODE=$?
|
EXIT_CODE=$?
|
||||||
|
|
||||||
if [[ "${EXIT_CODE}" == "0" ]]; then
|
if [[ "${EXIT_CODE}" == "0" ]]; then
|
||||||
@ -74,7 +62,7 @@ runs:
|
|||||||
# installation, which is Python 3 based. Its Python is default to Python 3. Further, there
|
# installation, which is Python 3 based. Its Python is default to Python 3. Further, there
|
||||||
# is also the Miniconda installation that is Python 2 based, and both can be installed if
|
# is also the Miniconda installation that is Python 2 based, and both can be installed if
|
||||||
# needed. In both cases, Python binary is just called python
|
# needed. In both cases, Python binary is just called python
|
||||||
PYTHON=$(${CONDA_RUN} -n py_tmp which python)
|
PYTHON=$(${CONDA_RUN} which python)
|
||||||
EXIT_CODE=$?
|
EXIT_CODE=$?
|
||||||
|
|
||||||
if [[ "${EXIT_CODE}" == "0" ]]; then
|
if [[ "${EXIT_CODE}" == "0" ]]; then
|
||||||
|
|||||||
2
.github/ci_commit_pins/vllm.txt
vendored
2
.github/ci_commit_pins/vllm.txt
vendored
@ -1 +1 @@
|
|||||||
0307428d65acf5cf1a73a70a7722e076bbb83f22
|
5bcc153d7bf69ef34bc5788a33f60f1792cf2861
|
||||||
|
|||||||
2
.github/ci_commit_pins/xla.txt
vendored
2
.github/ci_commit_pins/xla.txt
vendored
@ -1 +1 @@
|
|||||||
0fc62aa26a30ed7ca419d285f285cb5ba02c4394
|
c77852e117bdf056c8e9a087e51d6f65cf6ba53d
|
||||||
|
|||||||
3
.github/labeler.yml
vendored
3
.github/labeler.yml
vendored
@ -130,6 +130,3 @@
|
|||||||
- torch/csrc/inductor/aoti_include/**
|
- torch/csrc/inductor/aoti_include/**
|
||||||
- torchgen/aoti/**
|
- torchgen/aoti/**
|
||||||
- torchgen/gen_aoti_c_shim.py
|
- torchgen/gen_aoti_c_shim.py
|
||||||
|
|
||||||
"ciflow/vllm":
|
|
||||||
- .github/ci_commit_pins/vllm.txt
|
|
||||||
|
|||||||
15
.github/merge_rules.yaml
vendored
15
.github/merge_rules.yaml
vendored
@ -525,21 +525,6 @@
|
|||||||
- Lint
|
- Lint
|
||||||
- pull
|
- pull
|
||||||
|
|
||||||
- name: typechecking
|
|
||||||
patterns:
|
|
||||||
- 'pyrefly.toml'
|
|
||||||
- 'mypy.ini'
|
|
||||||
- 'mypy-strict.ini'
|
|
||||||
approved_by:
|
|
||||||
- lolpack
|
|
||||||
- maggiemoss
|
|
||||||
- ndmitchell
|
|
||||||
- kinto0
|
|
||||||
mandatory_checks_name:
|
|
||||||
- EasyCLA
|
|
||||||
- Lint
|
|
||||||
- pull
|
|
||||||
|
|
||||||
- name: superuser
|
- name: superuser
|
||||||
patterns:
|
patterns:
|
||||||
- '*'
|
- '*'
|
||||||
|
|||||||
39
.github/pytorch-probot.yml
vendored
39
.github/pytorch-probot.yml
vendored
@ -1,44 +1,41 @@
|
|||||||
tracking_issue: 24422
|
tracking_issue: 24422
|
||||||
ciflow_tracking_issue: 64124
|
ciflow_tracking_issue: 64124
|
||||||
ciflow_push_tags:
|
ciflow_push_tags:
|
||||||
- ciflow/b200
|
|
||||||
- ciflow/b200-symm-mem
|
|
||||||
- ciflow/binaries
|
- ciflow/binaries
|
||||||
- ciflow/binaries_libtorch
|
- ciflow/binaries_libtorch
|
||||||
- ciflow/binaries_wheel
|
- ciflow/binaries_wheel
|
||||||
- ciflow/h100
|
- ciflow/triton_binaries
|
||||||
- ciflow/h100-cutlass-backend
|
|
||||||
- ciflow/h100-distributed
|
|
||||||
- ciflow/h100-symm-mem
|
|
||||||
- ciflow/inductor
|
- ciflow/inductor
|
||||||
- ciflow/inductor-cu126
|
|
||||||
- ciflow/inductor-micro-benchmark
|
|
||||||
- ciflow/inductor-micro-benchmark-cpu-x86
|
|
||||||
- ciflow/inductor-perf-compare
|
|
||||||
- ciflow/inductor-perf-test-nightly-rocm
|
|
||||||
- ciflow/inductor-perf-test-nightly-x86-zen
|
|
||||||
- ciflow/inductor-periodic
|
- ciflow/inductor-periodic
|
||||||
- ciflow/inductor-rocm
|
- ciflow/inductor-rocm
|
||||||
|
- ciflow/inductor-perf-test-nightly-rocm
|
||||||
|
- ciflow/inductor-perf-compare
|
||||||
|
- ciflow/inductor-micro-benchmark
|
||||||
|
- ciflow/inductor-micro-benchmark-cpu-x86
|
||||||
|
- ciflow/inductor-perf-test-nightly-x86-zen
|
||||||
|
- ciflow/inductor-cu126
|
||||||
- ciflow/linux-aarch64
|
- ciflow/linux-aarch64
|
||||||
- ciflow/mps
|
- ciflow/mps
|
||||||
- ciflow/nightly
|
- ciflow/nightly
|
||||||
- ciflow/op-benchmark
|
|
||||||
- ciflow/periodic
|
- ciflow/periodic
|
||||||
- ciflow/periodic-rocm-mi300
|
- ciflow/periodic-rocm-mi300
|
||||||
- ciflow/pull
|
|
||||||
- ciflow/quantization-periodic
|
|
||||||
- ciflow/riscv64
|
|
||||||
- ciflow/rocm
|
- ciflow/rocm
|
||||||
- ciflow/rocm-mi300
|
- ciflow/rocm-mi300
|
||||||
- ciflow/s390
|
- ciflow/s390
|
||||||
|
- ciflow/riscv64
|
||||||
- ciflow/slow
|
- ciflow/slow
|
||||||
- ciflow/torchbench
|
|
||||||
- ciflow/triton_binaries
|
|
||||||
- ciflow/trunk
|
- ciflow/trunk
|
||||||
- ciflow/unstable
|
- ciflow/unstable
|
||||||
- ciflow/vllm
|
|
||||||
- ciflow/win-arm64
|
|
||||||
- ciflow/xpu
|
- ciflow/xpu
|
||||||
|
- ciflow/vllm
|
||||||
|
- ciflow/torchbench
|
||||||
|
- ciflow/op-benchmark
|
||||||
|
- ciflow/pull
|
||||||
|
- ciflow/h100
|
||||||
|
- ciflow/h100-distributed
|
||||||
|
- ciflow/win-arm64
|
||||||
|
- ciflow/h100-symm-mem
|
||||||
|
- ciflow/h100-cutlass-backend
|
||||||
retryable_workflows:
|
retryable_workflows:
|
||||||
- pull
|
- pull
|
||||||
- trunk
|
- trunk
|
||||||
@ -47,4 +44,4 @@ retryable_workflows:
|
|||||||
- inductor-A100-perf-nightly
|
- inductor-A100-perf-nightly
|
||||||
labeler_config: labeler.yml
|
labeler_config: labeler.yml
|
||||||
label_to_label_config: label_to_label.yml
|
label_to_label_config: label_to_label.yml
|
||||||
mergebot: true
|
mergebot: True
|
||||||
|
|||||||
@ -30,7 +30,7 @@ CUDA_ARCHES_CUDNN_VERSION = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# NOTE: Please also update the ROCm sources in `PIP_SOURCES` in tools/nightly.py when changing this
|
# NOTE: Please also update the ROCm sources in `PIP_SOURCES` in tools/nightly.py when changing this
|
||||||
ROCM_ARCHES = ["6.4", "7.0"]
|
ROCM_ARCHES = ["6.3", "6.4"]
|
||||||
|
|
||||||
XPU_ARCHES = ["xpu"]
|
XPU_ARCHES = ["xpu"]
|
||||||
|
|
||||||
@ -53,7 +53,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
|||||||
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | "
|
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | "
|
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
||||||
"nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | "
|
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
||||||
"nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | "
|
"nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | "
|
||||||
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | "
|
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | "
|
||||||
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | "
|
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | "
|
||||||
@ -70,7 +70,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
|||||||
"nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | "
|
"nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | "
|
"nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
"nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | "
|
||||||
"nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | "
|
"nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | "
|
||||||
"nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | "
|
"nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | "
|
||||||
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | "
|
"nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | "
|
||||||
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | "
|
"nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | "
|
||||||
@ -87,7 +87,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
|||||||
"nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | "
|
"nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | "
|
"nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | "
|
||||||
"nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | "
|
"nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | "
|
||||||
"nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | "
|
"nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | "
|
||||||
"nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | "
|
"nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | "
|
||||||
"nvidia-nvtx==13.0.39; platform_system == 'Linux' | "
|
"nvidia-nvtx==13.0.39; platform_system == 'Linux' | "
|
||||||
"nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | "
|
"nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | "
|
||||||
|
|||||||
4
.github/scripts/generate_ci_workflows.py
vendored
4
.github/scripts/generate_ci_workflows.py
vendored
@ -135,7 +135,7 @@ ROCM_SMOKE_WORKFLOWS = [
|
|||||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||||
OperatingSystem.LINUX,
|
OperatingSystem.LINUX,
|
||||||
arches=["6.4"],
|
arches=["6.4"],
|
||||||
python_versions=["3.10"],
|
python_versions=["3.9"],
|
||||||
),
|
),
|
||||||
ciflow_config=CIFlowConfig(
|
ciflow_config=CIFlowConfig(
|
||||||
labels={
|
labels={
|
||||||
@ -155,7 +155,7 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
|||||||
package_type="manywheel",
|
package_type="manywheel",
|
||||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||||
OperatingSystem.LINUX,
|
OperatingSystem.LINUX,
|
||||||
arches=["13.0"],
|
arches=["12.8"],
|
||||||
python_versions=["3.12"],
|
python_versions=["3.12"],
|
||||||
),
|
),
|
||||||
branches="main",
|
branches="main",
|
||||||
|
|||||||
@ -71,15 +71,12 @@ jobs:
|
|||||||
with:!{{ upload.binary_env_as_input(config) }}
|
with:!{{ upload.binary_env_as_input(config) }}
|
||||||
{%- if "aarch64" in build_environment %}
|
{%- if "aarch64" in build_environment %}
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
{%- elif "s390x" in build_environment %}
|
{%- elif "s390x" in build_environment %}
|
||||||
runs_on: linux.s390x
|
runs_on: linux.s390x
|
||||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
{%- elif config["gpu_arch_type"] == "rocm" %}
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
timeout-minutes: 300
|
|
||||||
{%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %}
|
{%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %}
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.24xlarge.ephemeral
|
runs_on: linux.24xlarge.ephemeral
|
||||||
|
|||||||
2
.github/workflows/_binary-test-linux.yml
vendored
2
.github/workflows/_binary-test-linux.yml
vendored
@ -187,6 +187,8 @@ jobs:
|
|||||||
|
|
||||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
||||||
|
with:
|
||||||
|
driver-version: ${{ startsWith(inputs.GPU_ARCH_VERSION, '13') && '580.65.06' || '570.133.07' }}
|
||||||
if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}
|
if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}
|
||||||
|
|
||||||
- name: configure aws credentials
|
- name: configure aws credentials
|
||||||
|
|||||||
16
.github/workflows/_docs.yml
vendored
16
.github/workflows/_docs.yml
vendored
@ -67,7 +67,7 @@ jobs:
|
|||||||
# an OOM issue when running the job, so this upgrades the runner from 4xlarge
|
# an OOM issue when running the job, so this upgrades the runner from 4xlarge
|
||||||
# to the next available tier of 12xlarge. So much memory just to generate cpp
|
# to the next available tier of 12xlarge. So much memory just to generate cpp
|
||||||
# doc
|
# doc
|
||||||
runner: ${{ inputs.runner_prefix }}linux.12xlarge.memory
|
runner: ${{ inputs.runner_prefix }}linux.12xlarge
|
||||||
# TODO: Nightly cpp docs take longer and longer to finish (more than 3h now)
|
# TODO: Nightly cpp docs take longer and longer to finish (more than 3h now)
|
||||||
# Let's try to figure out how this can be improved
|
# Let's try to figure out how this can be improved
|
||||||
timeout-minutes: 360
|
timeout-minutes: 360
|
||||||
@ -75,6 +75,10 @@ jobs:
|
|||||||
runner: ${{ inputs.runner_prefix }}linux.2xlarge
|
runner: ${{ inputs.runner_prefix }}linux.2xlarge
|
||||||
# It takes less than 30m to finish python docs unless there are issues
|
# It takes less than 30m to finish python docs unless there are issues
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
|
- docs_type: functorch
|
||||||
|
runner: ${{ inputs.runner_prefix }}linux.2xlarge
|
||||||
|
# It takes less than 15m to finish functorch docs unless there are issues
|
||||||
|
timeout-minutes: 15
|
||||||
# Set a fixed name for this job instead of using the current matrix-generated name, i.e. build-docs (cpp, linux.12xlarge, 180)
|
# Set a fixed name for this job instead of using the current matrix-generated name, i.e. build-docs (cpp, linux.12xlarge, 180)
|
||||||
# The current name requires updating the database last docs push query from test-infra every time the matrix is updated
|
# The current name requires updating the database last docs push query from test-infra every time the matrix is updated
|
||||||
name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
|
name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
|
||||||
@ -207,6 +211,16 @@ jobs:
|
|||||||
path: cppdocs/
|
path: cppdocs/
|
||||||
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/cppdocs
|
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/cppdocs
|
||||||
|
|
||||||
|
- name: Upload functorch Docs Preview
|
||||||
|
uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0
|
||||||
|
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'functorch' && steps.build-docs.outcome == 'success' }}
|
||||||
|
with:
|
||||||
|
retention-days: 14
|
||||||
|
s3-bucket: doc-previews
|
||||||
|
if-no-files-found: error
|
||||||
|
path: functorch_ghpages/nightly/
|
||||||
|
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/functorchdocs
|
||||||
|
|
||||||
- name: Teardown Linux
|
- name: Teardown Linux
|
||||||
uses: pytorch/test-infra/.github/actions/teardown-linux@main
|
uses: pytorch/test-infra/.github/actions/teardown-linux@main
|
||||||
if: always()
|
if: always()
|
||||||
|
|||||||
28
.github/workflows/_get-changed-files.yml
vendored
28
.github/workflows/_get-changed-files.yml
vendored
@ -2,12 +2,6 @@ name: Get Changed Files
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_call:
|
workflow_call:
|
||||||
inputs:
|
|
||||||
all_files:
|
|
||||||
description: "Whether to return all files instead of just changed files"
|
|
||||||
required: false
|
|
||||||
type: boolean
|
|
||||||
default: false
|
|
||||||
outputs:
|
outputs:
|
||||||
changed-files:
|
changed-files:
|
||||||
description: "List of changed files (space-separated) or '*' if not in a PR"
|
description: "List of changed files (space-separated) or '*' if not in a PR"
|
||||||
@ -32,23 +26,17 @@ jobs:
|
|||||||
# Get the PR number from the github context
|
# Get the PR number from the github context
|
||||||
PR_NUMBER="${{ github.event.number }}"
|
PR_NUMBER="${{ github.event.number }}"
|
||||||
|
|
||||||
# Check if all_files is requested
|
# Use gh CLI to get changed files in the PR with explicit repo
|
||||||
if [ "${{ inputs.all_files }}" = "true" ]; then
|
CHANGED_FILES=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/files --paginate --jq '.[] | select(.status != "removed") | .filename' | tr '\n' ' ' | sed 's/ $//')
|
||||||
echo "all_files input is true, returning all files"
|
|
||||||
echo "changed-files=*" >> "$GITHUB_OUTPUT"
|
|
||||||
else
|
|
||||||
# Use gh CLI to get changed files in the PR with explicit repo
|
|
||||||
CHANGED_FILES=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/files --paginate --jq '.[] | select(.status != "removed") | .filename' | tr '\n' ' ' | sed 's/ $//')
|
|
||||||
|
|
||||||
if [ -z "$CHANGED_FILES" ]; then
|
if [ -z "$CHANGED_FILES" ]; then
|
||||||
echo "No changed files found, setting to '*'"
|
echo "No changed files found, setting to '*'"
|
||||||
CHANGED_FILES="*"
|
CHANGED_FILES="*"
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Changed files: $CHANGED_FILES"
|
|
||||||
echo "changed-files=$CHANGED_FILES" >> "$GITHUB_OUTPUT"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo "Changed files: $CHANGED_FILES"
|
||||||
|
echo "changed-files=$CHANGED_FILES" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
else
|
else
|
||||||
echo "Not in PR context, setting changed files to '*'"
|
echo "Not in PR context, setting changed files to '*'"
|
||||||
echo "changed-files=*" >> "$GITHUB_OUTPUT"
|
echo "changed-files=*" >> "$GITHUB_OUTPUT"
|
||||||
|
|||||||
2
.github/workflows/_linux-test.yml
vendored
2
.github/workflows/_linux-test.yml
vendored
@ -169,7 +169,7 @@ jobs:
|
|||||||
id: install-nvidia-driver
|
id: install-nvidia-driver
|
||||||
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
||||||
with:
|
with:
|
||||||
driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '580.82.07' }}
|
driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '570.133.07' }}
|
||||||
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}
|
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}
|
||||||
|
|
||||||
- name: Setup GPU_FLAG for docker run
|
- name: Setup GPU_FLAG for docker run
|
||||||
|
|||||||
2
.github/workflows/_win-build.yml
vendored
2
.github/workflows/_win-build.yml
vendored
@ -151,7 +151,7 @@ jobs:
|
|||||||
BUILD_WHEEL: 1
|
BUILD_WHEEL: 1
|
||||||
MAX_JOBS: 8
|
MAX_JOBS: 8
|
||||||
CUDA_VERSION: ${{ inputs.cuda-version }}
|
CUDA_VERSION: ${{ inputs.cuda-version }}
|
||||||
PYTHON_VERSION: "3.10"
|
PYTHON_VERSION: "3.9"
|
||||||
SCCACHE_BUCKET: "ossci-compiler-cache"
|
SCCACHE_BUCKET: "ossci-compiler-cache"
|
||||||
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
|
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
|
||||||
SCCACHE_REGION: us-east-1
|
SCCACHE_REGION: us-east-1
|
||||||
|
|||||||
2
.github/workflows/_win-test.yml
vendored
2
.github/workflows/_win-test.yml
vendored
@ -184,7 +184,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
|
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
|
||||||
INSTALL_WINDOWS_SDK: 1
|
INSTALL_WINDOWS_SDK: 1
|
||||||
PYTHON_VERSION: "3.10"
|
PYTHON_VERSION: 3.9
|
||||||
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
||||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||||
TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
|
TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
|
||||||
|
|||||||
60
.github/workflows/b200-symm-mem.yml
vendored
60
.github/workflows/b200-symm-mem.yml
vendored
@ -1,60 +0,0 @@
|
|||||||
name: Limited CI for symmetric memory tests on B200
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- .github/workflows/b200-symm-mem.yml
|
|
||||||
workflow_dispatch:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- ciflow/b200-symm-mem/*
|
|
||||||
schedule:
|
|
||||||
- cron: 22 8 * * * # about 1:22am PDT
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
|
|
||||||
get-label-type:
|
|
||||||
if: github.repository_owner == 'pytorch'
|
|
||||||
name: get-label-type
|
|
||||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
|
||||||
with:
|
|
||||||
triggering_actor: ${{ github.triggering_actor }}
|
|
||||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
|
||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
|
||||||
|
|
||||||
linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm:
|
|
||||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
|
|
||||||
uses: ./.github/workflows/_linux-build.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
runner: linux.12xlarge.memory
|
|
||||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
|
|
||||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
|
||||||
cuda-arch-list: '10.0'
|
|
||||||
test-matrix: |
|
|
||||||
{ include: [
|
|
||||||
{ config: "b200-symm-mem", shard: 1, num_shards: 1, runner: "linux.dgx.b200.8" },
|
|
||||||
]}
|
|
||||||
secrets: inherit
|
|
||||||
|
|
||||||
linux-jammy-cuda12_8-py3_10-gcc11-sm100-test:
|
|
||||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
|
|
||||||
uses: ./.github/workflows/_linux-test.yml
|
|
||||||
needs:
|
|
||||||
- linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm
|
|
||||||
with:
|
|
||||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100-symm
|
|
||||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm.outputs.docker-image }}
|
|
||||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build-symm.outputs.test-matrix }}
|
|
||||||
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
|
||||||
secrets: inherit
|
|
||||||
2
.github/workflows/build-almalinux-images.yml
vendored
2
.github/workflows/build-almalinux-images.yml
vendored
@ -36,7 +36,7 @@ jobs:
|
|||||||
runs-on: linux.9xlarge.ephemeral
|
runs-on: linux.9xlarge.ephemeral
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.3", "rocm6.4", "rocm7.0", "cpu"]
|
tag: ["cuda12.6", "cuda12.8", "cuda12.9", "cuda13.0", "rocm6.3", "rocm6.4", "cpu"]
|
||||||
steps:
|
steps:
|
||||||
- name: Build docker image
|
- name: Build docker image
|
||||||
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
uses: pytorch/pytorch/.github/actions/binary-docker-build@main
|
||||||
|
|||||||
2
.github/workflows/build-libtorch-images.yml
vendored
2
.github/workflows/build-libtorch-images.yml
vendored
@ -52,8 +52,8 @@ jobs:
|
|||||||
{ tag: "cuda12.9" },
|
{ tag: "cuda12.9" },
|
||||||
{ tag: "cuda12.8" },
|
{ tag: "cuda12.8" },
|
||||||
{ tag: "cuda12.6" },
|
{ tag: "cuda12.6" },
|
||||||
|
{ tag: "rocm6.3" },
|
||||||
{ tag: "rocm6.4" },
|
{ tag: "rocm6.4" },
|
||||||
{ tag: "rocm7.0" },
|
|
||||||
{ tag: "cpu" },
|
{ tag: "cpu" },
|
||||||
]
|
]
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.github/workflows/build-magma-rocm-linux.yml
vendored
2
.github/workflows/build-magma-rocm-linux.yml
vendored
@ -34,7 +34,7 @@ jobs:
|
|||||||
id-token: write
|
id-token: write
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
rocm_version: ["70", "64"]
|
rocm_version: ["64", "63"]
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|||||||
2
.github/workflows/build-manywheel-images.yml
vendored
2
.github/workflows/build-manywheel-images.yml
vendored
@ -52,8 +52,8 @@ jobs:
|
|||||||
{ name: "manylinuxaarch64-builder", tag: "cuda13.0", runner: "linux.arm64.2xlarge.ephemeral" },
|
{ name: "manylinuxaarch64-builder", tag: "cuda13.0", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
|
{ name: "manylinuxaarch64-builder", tag: "cuda12.8", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||||
{ name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" },
|
{ name: "manylinuxaarch64-builder", tag: "cuda12.6", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||||
|
{ name: "manylinux2_28-builder", tag: "rocm6.3", runner: "linux.9xlarge.ephemeral" },
|
||||||
{ name: "manylinux2_28-builder", tag: "rocm6.4", runner: "linux.9xlarge.ephemeral" },
|
{ name: "manylinux2_28-builder", tag: "rocm6.4", runner: "linux.9xlarge.ephemeral" },
|
||||||
{ name: "manylinux2_28-builder", tag: "rocm7.0", runner: "linux.9xlarge.ephemeral" },
|
|
||||||
{ name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" },
|
{ name: "manylinux2_28-builder", tag: "cpu", runner: "linux.9xlarge.ephemeral" },
|
||||||
{ name: "manylinux2_28_aarch64-builder", tag: "cpu-aarch64", runner: "linux.arm64.2xlarge.ephemeral" },
|
{ name: "manylinux2_28_aarch64-builder", tag: "cpu-aarch64", runner: "linux.arm64.2xlarge.ephemeral" },
|
||||||
{ name: "manylinuxcxx11-abi-builder", tag: "cpu-cxx11-abi", runner: "linux.9xlarge.ephemeral" },
|
{ name: "manylinuxcxx11-abi-builder", tag: "cpu-cxx11-abi", runner: "linux.9xlarge.ephemeral" },
|
||||||
|
|||||||
9
.github/workflows/build-triton-wheel.yml
vendored
9
.github/workflows/build-triton-wheel.yml
vendored
@ -50,12 +50,12 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
py_vers: [ "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ]
|
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ]
|
||||||
device: ["cuda", "rocm", "xpu", "aarch64"]
|
device: ["cuda", "rocm", "xpu", "aarch64"]
|
||||||
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
|
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
|
||||||
include:
|
include:
|
||||||
- device: "rocm"
|
- device: "rocm"
|
||||||
rocm_version: "7.0"
|
rocm_version: "6.4"
|
||||||
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
|
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
|
||||||
- device: "cuda"
|
- device: "cuda"
|
||||||
rocm_version: ""
|
rocm_version: ""
|
||||||
@ -108,6 +108,9 @@ jobs:
|
|||||||
|
|
||||||
# Determine python executable for given version
|
# Determine python executable for given version
|
||||||
case $PY_VERS in
|
case $PY_VERS in
|
||||||
|
3.9)
|
||||||
|
PYTHON_EXECUTABLE=/opt/python/cp39-cp39/bin/python
|
||||||
|
;;
|
||||||
3.10)
|
3.10)
|
||||||
PYTHON_EXECUTABLE=/opt/python/cp310-cp310/bin/python
|
PYTHON_EXECUTABLE=/opt/python/cp310-cp310/bin/python
|
||||||
;;
|
;;
|
||||||
@ -191,7 +194,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
py_vers: [ "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ]
|
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ]
|
||||||
device: ["xpu"]
|
device: ["xpu"]
|
||||||
timeout-minutes: 40
|
timeout-minutes: 40
|
||||||
env:
|
env:
|
||||||
|
|||||||
2
.github/workflows/build-vllm-wheel.yml
vendored
2
.github/workflows/build-vllm-wheel.yml
vendored
@ -178,7 +178,7 @@ jobs:
|
|||||||
contents: read
|
contents: read
|
||||||
container:
|
container:
|
||||||
image: continuumio/miniconda3:4.12.0
|
image: continuumio/miniconda3:4.12.0
|
||||||
environment: ${{ ((github.event_name == 'push' && github.event.ref == 'refs/heads/main') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && 'nightly-wheel-upload' || '' }}
|
environment: ${{ (github.event_name == 'push' && github.event.ref == 'refs/heads/main') && 'nightly-wheel-upload' || '' }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
|||||||
59
.github/workflows/create_release.yml
vendored
59
.github/workflows/create_release.yml
vendored
@ -35,7 +35,6 @@ jobs:
|
|||||||
contents: write
|
contents: write
|
||||||
outputs:
|
outputs:
|
||||||
pt_release_name: ${{ steps.release_name.outputs.pt_release_name }}
|
pt_release_name: ${{ steps.release_name.outputs.pt_release_name }}
|
||||||
pt_pep517_release_name: ${{ steps.release_name.outputs.pt_pep517_release_name }}
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
@ -54,12 +53,8 @@ jobs:
|
|||||||
tag_or_branch="${tag_or_branch#refs/heads/}"
|
tag_or_branch="${tag_or_branch#refs/heads/}"
|
||||||
# replace directory separators with _ in branch name
|
# replace directory separators with _ in branch name
|
||||||
tag_or_branch="${tag_or_branch//\//_}"
|
tag_or_branch="${tag_or_branch//\//_}"
|
||||||
torch_version="$(python -c 'from tools.generate_torch_version import get_torch_version; print(get_torch_version())')"
|
echo "PT_RELEASE_NAME=pytorch-$tag_or_branch" >> "$GITHUB_ENV"
|
||||||
{
|
echo "PT_RELEASE_FILE=pytorch-$tag_or_branch.tar.gz" >> "$GITHUB_ENV"
|
||||||
echo "PT_RELEASE_NAME=pytorch-$tag_or_branch";
|
|
||||||
echo "PT_RELEASE_FILE=pytorch-$tag_or_branch.tar.gz";
|
|
||||||
echo "PT_PEP517_RELEASE_FILE=torch-${torch_version}.tar.gz";
|
|
||||||
} >> "$GITHUB_ENV"
|
|
||||||
- name: Checkout optional submodules
|
- name: Checkout optional submodules
|
||||||
run: python3 tools/optional_submodules.py
|
run: python3 tools/optional_submodules.py
|
||||||
- name: Copy docs requirements for inclusion
|
- name: Copy docs requirements for inclusion
|
||||||
@ -69,47 +64,30 @@ jobs:
|
|||||||
cp .ci/docker/requirements-docs.txt docs/requirements.txt
|
cp .ci/docker/requirements-docs.txt docs/requirements.txt
|
||||||
- name: Create source distribution
|
- name: Create source distribution
|
||||||
run: |
|
run: |
|
||||||
# Create new folder with specified name so extracting the archive yields that
|
# Create new folder with specified name so extracting the archive yields that
|
||||||
rm -rf "/tmp/$PT_RELEASE_NAME"
|
rm -rf "/tmp/$PT_RELEASE_NAME"
|
||||||
cp -r "$PWD" "/tmp/$PT_RELEASE_NAME"
|
cp -r "$PWD" "/tmp/$PT_RELEASE_NAME"
|
||||||
mv "/tmp/$PT_RELEASE_NAME" .
|
mv "/tmp/$PT_RELEASE_NAME" .
|
||||||
# Cleanup
|
# Cleanup
|
||||||
rm -rf "$PT_RELEASE_NAME"/{.circleci,.ci}
|
rm -rf "$PT_RELEASE_NAME"/{.circleci,.ci}
|
||||||
find "$PT_RELEASE_NAME" -name '.git*' -exec rm -rv {} \; || true
|
find "$PT_RELEASE_NAME" -name '.git*' -exec rm -rv {} \; || true
|
||||||
# Create archive
|
# Create archive
|
||||||
tar -czf "$PT_RELEASE_FILE" "$PT_RELEASE_NAME"
|
tar -czf "$PT_RELEASE_FILE" "$PT_RELEASE_NAME"
|
||||||
echo "Created source archive $PT_RELEASE_FILE with content: $(ls -a "$PT_RELEASE_NAME")"
|
echo "Created source archive $PT_RELEASE_FILE with content: $(ls -a "$PT_RELEASE_NAME")"
|
||||||
- name: Create PEP 517 compatible source distribution
|
|
||||||
run: |
|
|
||||||
pip install build==1.2.2.post1 || exit 1
|
|
||||||
python -m build --sdist || exit 1
|
|
||||||
cd dist || exit 1
|
|
||||||
- name: Upload source distribution for release
|
- name: Upload source distribution for release
|
||||||
if: ${{ github.event_name == 'release' }}
|
if: ${{ github.event_name == 'release' }}
|
||||||
uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631 # v2.2.2
|
uses: softprops/action-gh-release@da05d552573ad5aba039eaac05058a918a7bf631 # v2.2.2
|
||||||
with:
|
with:
|
||||||
files: |
|
files: ${{env.PT_RELEASE_FILE}}
|
||||||
${{ env.PT_RELEASE_FILE }}
|
- name: Upload source distribution to GHA artifacts for release tags
|
||||||
${{ env.PT_PEP517_RELEASE_FILE }}
|
|
||||||
- name: Upload source distribution to GHA artifacts # for release tags
|
|
||||||
if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
|
if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
|
||||||
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
|
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
|
||||||
with:
|
with:
|
||||||
name: ${{ env.PT_RELEASE_FILE }}
|
name: ${{ env.PT_RELEASE_FILE }}
|
||||||
path: ${{ env.PT_RELEASE_FILE }}
|
path: ${{ env.PT_RELEASE_FILE }}
|
||||||
- name: Upload PEP 517 source distribution to GHA artifacts # for release tags
|
|
||||||
if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
|
|
||||||
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
|
|
||||||
with:
|
|
||||||
name: ${{ env.PT_PEP517_RELEASE_FILE }}
|
|
||||||
path: dist/${{ env.PT_PEP517_RELEASE_FILE }}
|
|
||||||
- name: Set output
|
- name: Set output
|
||||||
id: release_name
|
id: release_name
|
||||||
run: |
|
run: echo "pt_release_name=${{ env.PT_RELEASE_NAME }}.tar.gz" >> "${GITHUB_OUTPUT}"
|
||||||
{
|
|
||||||
echo "pt_release_name=${{ env.PT_RELEASE_FILE }}";
|
|
||||||
echo "pt_pep517_release_name=${{ env.PT_PEP517_RELEASE_FILE }}";
|
|
||||||
} >> "${GITHUB_OUTPUT}"
|
|
||||||
|
|
||||||
upload_source_code_to_s3:
|
upload_source_code_to_s3:
|
||||||
if: ${{ github.repository == 'pytorch/pytorch' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
|
if: ${{ github.repository == 'pytorch/pytorch' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
|
||||||
@ -125,9 +103,6 @@ jobs:
|
|||||||
- uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
|
- uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
|
||||||
with:
|
with:
|
||||||
name: ${{ needs.release.outputs.pt_release_name }}
|
name: ${{ needs.release.outputs.pt_release_name }}
|
||||||
- uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
|
|
||||||
with:
|
|
||||||
name: ${{ needs.release.outputs.pt_pep517_release_name }}
|
|
||||||
- name: Configure AWS credentials(PyTorch account)
|
- name: Configure AWS credentials(PyTorch account)
|
||||||
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
|
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
|
||||||
with:
|
with:
|
||||||
@ -138,9 +113,7 @@ jobs:
|
|||||||
s3-bucket: pytorch
|
s3-bucket: pytorch
|
||||||
s3-prefix: source_code/test
|
s3-prefix: source_code/test
|
||||||
if-no-files-found: warn
|
if-no-files-found: warn
|
||||||
path: |
|
path: ${{ needs.release.outputs.pt_release_name }}
|
||||||
${{ needs.release.outputs.pt_release_name }}
|
|
||||||
${{ needs.release.outputs.pt_pep517_release_name }}
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name }}
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name }}
|
||||||
|
|||||||
5
.github/workflows/docker-builds.yml
vendored
5
.github/workflows/docker-builds.yml
vendored
@ -70,8 +70,9 @@ jobs:
|
|||||||
pytorch-linux-jammy-py3-clang18-asan,
|
pytorch-linux-jammy-py3-clang18-asan,
|
||||||
pytorch-linux-jammy-py3-clang12-onnx,
|
pytorch-linux-jammy-py3-clang12-onnx,
|
||||||
pytorch-linux-jammy-linter,
|
pytorch-linux-jammy-linter,
|
||||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter,
|
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter,
|
||||||
pytorch-linux-jammy-py3-clang12-executorch,
|
# Executorch pin needs update
|
||||||
|
# pytorch-linux-jammy-py3-clang12-executorch,
|
||||||
pytorch-linux-jammy-py3.12-triton-cpu,
|
pytorch-linux-jammy-py3.12-triton-cpu,
|
||||||
pytorch-linux-noble-riscv64-py3.12-gcc14
|
pytorch-linux-noble-riscv64-py3.12-gcc14
|
||||||
]
|
]
|
||||||
|
|||||||
98
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
98
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -62,7 +62,7 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.10"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_10-cpu-aarch64
|
build_name: manywheel-py3_10-cpu-aarch64
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
@ -128,11 +128,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.10"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_10-cuda-aarch64-12_6
|
build_name: manywheel-py3_10-cuda-aarch64-12_6
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -174,11 +174,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.10"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_10-cuda-aarch64-12_8
|
build_name: manywheel-py3_10-cuda-aarch64-12_8
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -220,11 +220,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.10"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_10-cuda-aarch64-13_0
|
build_name: manywheel-py3_10-cuda-aarch64-13_0
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -265,7 +265,7 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||||
DESIRED_PYTHON: "3.11"
|
DESIRED_PYTHON: "3.11"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_11-cpu-aarch64
|
build_name: manywheel-py3_11-cpu-aarch64
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
@ -331,11 +331,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||||
DESIRED_PYTHON: "3.11"
|
DESIRED_PYTHON: "3.11"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_11-cuda-aarch64-12_6
|
build_name: manywheel-py3_11-cuda-aarch64-12_6
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -377,11 +377,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.11"
|
DESIRED_PYTHON: "3.11"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_11-cuda-aarch64-12_8
|
build_name: manywheel-py3_11-cuda-aarch64-12_8
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -423,11 +423,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
||||||
DESIRED_PYTHON: "3.11"
|
DESIRED_PYTHON: "3.11"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_11-cuda-aarch64-13_0
|
build_name: manywheel-py3_11-cuda-aarch64-13_0
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -468,7 +468,7 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_12-cpu-aarch64
|
build_name: manywheel-py3_12-cpu-aarch64
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
@ -534,11 +534,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_12-cuda-aarch64-12_6
|
build_name: manywheel-py3_12-cuda-aarch64-12_6
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -580,11 +580,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_12-cuda-aarch64-12_8
|
build_name: manywheel-py3_12-cuda-aarch64-12_8
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -626,11 +626,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_12-cuda-aarch64-13_0
|
build_name: manywheel-py3_12-cuda-aarch64-13_0
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -671,7 +671,7 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||||
DESIRED_PYTHON: "3.13"
|
DESIRED_PYTHON: "3.13"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_13-cpu-aarch64
|
build_name: manywheel-py3_13-cpu-aarch64
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
@ -737,11 +737,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||||
DESIRED_PYTHON: "3.13"
|
DESIRED_PYTHON: "3.13"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_13-cuda-aarch64-12_6
|
build_name: manywheel-py3_13-cuda-aarch64-12_6
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -783,11 +783,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.13"
|
DESIRED_PYTHON: "3.13"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_13-cuda-aarch64-12_8
|
build_name: manywheel-py3_13-cuda-aarch64-12_8
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -829,11 +829,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
||||||
DESIRED_PYTHON: "3.13"
|
DESIRED_PYTHON: "3.13"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_13-cuda-aarch64-13_0
|
build_name: manywheel-py3_13-cuda-aarch64-13_0
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -874,7 +874,7 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||||
DESIRED_PYTHON: "3.13t"
|
DESIRED_PYTHON: "3.13t"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_13t-cpu-aarch64
|
build_name: manywheel-py3_13t-cpu-aarch64
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
@ -940,11 +940,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||||
DESIRED_PYTHON: "3.13t"
|
DESIRED_PYTHON: "3.13t"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_13t-cuda-aarch64-12_6
|
build_name: manywheel-py3_13t-cuda-aarch64-12_6
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -986,11 +986,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.13t"
|
DESIRED_PYTHON: "3.13t"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_13t-cuda-aarch64-12_8
|
build_name: manywheel-py3_13t-cuda-aarch64-12_8
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -1032,11 +1032,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
||||||
DESIRED_PYTHON: "3.13t"
|
DESIRED_PYTHON: "3.13t"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_13t-cuda-aarch64-13_0
|
build_name: manywheel-py3_13t-cuda-aarch64-13_0
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -1077,7 +1077,7 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||||
DESIRED_PYTHON: "3.14"
|
DESIRED_PYTHON: "3.14"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_14-cpu-aarch64
|
build_name: manywheel-py3_14-cpu-aarch64
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
@ -1143,11 +1143,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||||
DESIRED_PYTHON: "3.14"
|
DESIRED_PYTHON: "3.14"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_14-cuda-aarch64-12_6
|
build_name: manywheel-py3_14-cuda-aarch64-12_6
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -1189,11 +1189,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.14"
|
DESIRED_PYTHON: "3.14"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_14-cuda-aarch64-12_8
|
build_name: manywheel-py3_14-cuda-aarch64-12_8
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -1235,11 +1235,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
||||||
DESIRED_PYTHON: "3.14"
|
DESIRED_PYTHON: "3.14"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_14-cuda-aarch64-13_0
|
build_name: manywheel-py3_14-cuda-aarch64-13_0
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -1280,7 +1280,7 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64
|
||||||
DESIRED_PYTHON: "3.14t"
|
DESIRED_PYTHON: "3.14t"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_14t-cpu-aarch64
|
build_name: manywheel-py3_14t-cpu-aarch64
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
@ -1346,11 +1346,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.6
|
||||||
DESIRED_PYTHON: "3.14t"
|
DESIRED_PYTHON: "3.14t"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_14t-cuda-aarch64-12_6
|
build_name: manywheel-py3_14t-cuda-aarch64-12_6
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -1392,11 +1392,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.14t"
|
DESIRED_PYTHON: "3.14t"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_14t-cuda-aarch64-12_8
|
build_name: manywheel-py3_14t-cuda-aarch64-12_8
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -1438,11 +1438,11 @@ jobs:
|
|||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
||||||
DESIRED_PYTHON: "3.14t"
|
DESIRED_PYTHON: "3.14t"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.arm64.r7g.12xlarge.memory
|
runs_on: linux.arm64.m7g.4xlarge.ephemeral
|
||||||
ALPINE_IMAGE: "arm64v8/alpine"
|
ALPINE_IMAGE: "arm64v8/alpine"
|
||||||
build_name: manywheel-py3_14t-cuda-aarch64-13_0
|
build_name: manywheel-py3_14t-cuda-aarch64-13_0
|
||||||
build_environment: linux-aarch64-binary-manywheel
|
build_environment: linux-aarch64-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
||||||
timeout-minutes: 420
|
timeout-minutes: 420
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|||||||
230
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
230
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
@ -316,6 +316,120 @@ jobs:
|
|||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
||||||
|
libtorch-rocm6_3-shared-with-deps-release-build:
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
|
needs: get-label-type
|
||||||
|
with:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.3
|
||||||
|
GPU_ARCH_VERSION: "6.3"
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
|
build_name: libtorch-rocm6_3-shared-with-deps-release
|
||||||
|
build_environment: linux-binary-libtorch
|
||||||
|
secrets:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
libtorch-rocm6_3-shared-with-deps-release-test: # Testing
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
needs:
|
||||||
|
- libtorch-rocm6_3-shared-with-deps-release-build
|
||||||
|
- get-label-type
|
||||||
|
runs-on: linux.rocm.gpu.mi250
|
||||||
|
timeout-minutes: 240
|
||||||
|
env:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.3
|
||||||
|
GPU_ARCH_VERSION: "6.3"
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
SKIP_ALL_TESTS: 1
|
||||||
|
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
steps:
|
||||||
|
- name: Setup ROCm
|
||||||
|
uses: ./.github/actions/setup-rocm
|
||||||
|
- uses: actions/download-artifact@v4.1.7
|
||||||
|
name: Download Build Artifacts
|
||||||
|
with:
|
||||||
|
name: libtorch-rocm6_3-shared-with-deps-release
|
||||||
|
path: "${{ runner.temp }}/artifacts/"
|
||||||
|
- name: Checkout PyTorch
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||||
|
submodules: recursive
|
||||||
|
path: pytorch
|
||||||
|
show-progress: false
|
||||||
|
- name: Clean PyTorch checkout
|
||||||
|
run: |
|
||||||
|
# Remove any artifacts from the previous checkouts
|
||||||
|
git clean -fxd
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: ROCm set GPU_FLAG
|
||||||
|
run: |
|
||||||
|
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||||
|
- name: configure aws credentials
|
||||||
|
id: aws_creds
|
||||||
|
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||||
|
aws-region: us-east-1
|
||||||
|
role-duration-seconds: 18000
|
||||||
|
- name: Calculate docker image
|
||||||
|
id: calculate-docker-image
|
||||||
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
|
||||||
|
docker-image-name: libtorch-cxx11-builder
|
||||||
|
custom-tag-prefix: rocm6.3
|
||||||
|
docker-build-dir: .ci/docker
|
||||||
|
working-directory: pytorch
|
||||||
|
- name: Pull Docker image
|
||||||
|
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||||
|
with:
|
||||||
|
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||||
|
- name: Test Pytorch binary
|
||||||
|
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||||
|
env:
|
||||||
|
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||||
|
- name: Teardown ROCm
|
||||||
|
uses: ./.github/actions/teardown-rocm
|
||||||
|
libtorch-rocm6_3-shared-with-deps-release-upload: # Uploading
|
||||||
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
|
contents: read
|
||||||
|
needs: libtorch-rocm6_3-shared-with-deps-release-test
|
||||||
|
with:
|
||||||
|
PYTORCH_ROOT: /pytorch
|
||||||
|
PACKAGE_TYPE: libtorch
|
||||||
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
|
# favor of GPU_ARCH_VERSION
|
||||||
|
DESIRED_CUDA: rocm6.3
|
||||||
|
GPU_ARCH_VERSION: "6.3"
|
||||||
|
GPU_ARCH_TYPE: rocm
|
||||||
|
DOCKER_IMAGE: libtorch-cxx11-builder
|
||||||
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.3
|
||||||
|
LIBTORCH_CONFIG: release
|
||||||
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
|
build_name: libtorch-rocm6_3-shared-with-deps-release
|
||||||
|
secrets:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
||||||
libtorch-rocm6_4-shared-with-deps-release-build:
|
libtorch-rocm6_4-shared-with-deps-release-build:
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
@ -333,7 +447,6 @@ jobs:
|
|||||||
LIBTORCH_CONFIG: release
|
LIBTORCH_CONFIG: release
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
LIBTORCH_VARIANT: shared-with-deps
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
timeout-minutes: 300
|
|
||||||
build_name: libtorch-rocm6_4-shared-with-deps-release
|
build_name: libtorch-rocm6_4-shared-with-deps-release
|
||||||
build_environment: linux-binary-libtorch
|
build_environment: linux-binary-libtorch
|
||||||
secrets:
|
secrets:
|
||||||
@ -430,118 +543,3 @@ jobs:
|
|||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
uses: ./.github/workflows/_binary-upload.yml
|
||||||
|
|
||||||
libtorch-rocm7_0-shared-with-deps-release-build:
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm7.0
|
|
||||||
GPU_ARCH_VERSION: "7.0"
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm7.0
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
timeout-minutes: 300
|
|
||||||
build_name: libtorch-rocm7_0-shared-with-deps-release
|
|
||||||
build_environment: linux-binary-libtorch
|
|
||||||
secrets:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
libtorch-rocm7_0-shared-with-deps-release-test: # Testing
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
needs:
|
|
||||||
- libtorch-rocm7_0-shared-with-deps-release-build
|
|
||||||
- get-label-type
|
|
||||||
runs-on: linux.rocm.gpu.mi250
|
|
||||||
timeout-minutes: 240
|
|
||||||
env:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm7.0
|
|
||||||
GPU_ARCH_VERSION: "7.0"
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
SKIP_ALL_TESTS: 1
|
|
||||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm7.0
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
steps:
|
|
||||||
- name: Setup ROCm
|
|
||||||
uses: ./.github/actions/setup-rocm
|
|
||||||
- uses: actions/download-artifact@v4.1.7
|
|
||||||
name: Download Build Artifacts
|
|
||||||
with:
|
|
||||||
name: libtorch-rocm7_0-shared-with-deps-release
|
|
||||||
path: "${{ runner.temp }}/artifacts/"
|
|
||||||
- name: Checkout PyTorch
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
|
||||||
submodules: recursive
|
|
||||||
path: pytorch
|
|
||||||
show-progress: false
|
|
||||||
- name: Clean PyTorch checkout
|
|
||||||
run: |
|
|
||||||
# Remove any artifacts from the previous checkouts
|
|
||||||
git clean -fxd
|
|
||||||
working-directory: pytorch
|
|
||||||
- name: ROCm set GPU_FLAG
|
|
||||||
run: |
|
|
||||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
|
||||||
- name: configure aws credentials
|
|
||||||
id: aws_creds
|
|
||||||
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
|
||||||
uses: aws-actions/configure-aws-credentials@v4
|
|
||||||
with:
|
|
||||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
|
||||||
aws-region: us-east-1
|
|
||||||
role-duration-seconds: 18000
|
|
||||||
- name: Calculate docker image
|
|
||||||
id: calculate-docker-image
|
|
||||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
|
||||||
with:
|
|
||||||
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
|
|
||||||
docker-image-name: libtorch-cxx11-builder
|
|
||||||
custom-tag-prefix: rocm7.0
|
|
||||||
docker-build-dir: .ci/docker
|
|
||||||
working-directory: pytorch
|
|
||||||
- name: Pull Docker image
|
|
||||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
|
||||||
with:
|
|
||||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
||||||
- name: Test Pytorch binary
|
|
||||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
|
||||||
env:
|
|
||||||
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
||||||
- name: Teardown ROCm
|
|
||||||
uses: ./.github/actions/teardown-rocm
|
|
||||||
libtorch-rocm7_0-shared-with-deps-release-upload: # Uploading
|
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
needs: libtorch-rocm7_0-shared-with-deps-release-test
|
|
||||||
with:
|
|
||||||
PYTORCH_ROOT: /pytorch
|
|
||||||
PACKAGE_TYPE: libtorch
|
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
|
||||||
# favor of GPU_ARCH_VERSION
|
|
||||||
DESIRED_CUDA: rocm7.0
|
|
||||||
GPU_ARCH_VERSION: "7.0"
|
|
||||||
GPU_ARCH_TYPE: rocm
|
|
||||||
DOCKER_IMAGE: libtorch-cxx11-builder
|
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm7.0
|
|
||||||
LIBTORCH_CONFIG: release
|
|
||||||
LIBTORCH_VARIANT: shared-with-deps
|
|
||||||
build_name: libtorch-rocm7_0-shared-with-deps-release
|
|
||||||
secrets:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
uses: ./.github/workflows/_binary-upload.yml
|
|
||||||
|
|||||||
24
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
24
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
@ -42,7 +42,7 @@ jobs:
|
|||||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
curr_ref_type: ${{ github.ref_type }}
|
||||||
manywheel-py3_12-cuda13_0-build:
|
manywheel-py3_12-cuda12_8-build:
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
@ -51,22 +51,22 @@ jobs:
|
|||||||
PACKAGE_TYPE: manywheel
|
PACKAGE_TYPE: manywheel
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cu130
|
DESIRED_CUDA: cu128
|
||||||
GPU_ARCH_VERSION: "13.0"
|
GPU_ARCH_VERSION: "12.8"
|
||||||
GPU_ARCH_TYPE: cuda
|
GPU_ARCH_TYPE: cuda
|
||||||
DOCKER_IMAGE: manylinux2_28-builder
|
DOCKER_IMAGE: manylinux2_28-builder
|
||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
build_name: manywheel-py3_12-cuda13_0
|
build_name: manywheel-py3_12-cuda12_8
|
||||||
build_environment: linux-binary-manywheel
|
build_environment: linux-binary-manywheel
|
||||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.3; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux'
|
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
manywheel-py3_12-cuda13_0-test: # Testing
|
manywheel-py3_12-cuda12_8-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_12-cuda13_0-build
|
- manywheel-py3_12-cuda12_8-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
uses: ./.github/workflows/_binary-test-linux.yml
|
uses: ./.github/workflows/_binary-test-linux.yml
|
||||||
with:
|
with:
|
||||||
@ -74,13 +74,13 @@ jobs:
|
|||||||
PACKAGE_TYPE: manywheel
|
PACKAGE_TYPE: manywheel
|
||||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||||
# favor of GPU_ARCH_VERSION
|
# favor of GPU_ARCH_VERSION
|
||||||
DESIRED_CUDA: cu130
|
DESIRED_CUDA: cu128
|
||||||
GPU_ARCH_VERSION: "13.0"
|
GPU_ARCH_VERSION: "12.8"
|
||||||
GPU_ARCH_TYPE: cuda
|
GPU_ARCH_TYPE: cuda
|
||||||
DOCKER_IMAGE: manylinux2_28-builder
|
DOCKER_IMAGE: manylinux2_28-builder
|
||||||
DOCKER_IMAGE_TAG_PREFIX: cuda13.0
|
DOCKER_IMAGE_TAG_PREFIX: cuda12.8
|
||||||
DESIRED_PYTHON: "3.12"
|
DESIRED_PYTHON: "3.12"
|
||||||
build_name: manywheel-py3_12-cuda13_0
|
build_name: manywheel-py3_12-cuda12_8
|
||||||
build_environment: linux-binary-manywheel
|
build_environment: linux-binary-manywheel
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner
|
||||||
|
|||||||
1610
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
1610
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
15
.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
generated
vendored
15
.github/workflows/generated-linux-binary-manywheel-rocm-main.yml
generated
vendored
@ -44,7 +44,7 @@ jobs:
|
|||||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
curr_ref_type: ${{ github.ref_type }}
|
||||||
manywheel-py3_10-rocm6_4-build:
|
manywheel-py3_9-rocm6_4-build:
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
uses: ./.github/workflows/_binary-build-linux.yml
|
uses: ./.github/workflows/_binary-build-linux.yml
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
@ -58,17 +58,16 @@ jobs:
|
|||||||
GPU_ARCH_TYPE: rocm
|
GPU_ARCH_TYPE: rocm
|
||||||
DOCKER_IMAGE: manylinux2_28-builder
|
DOCKER_IMAGE: manylinux2_28-builder
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.9"
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
timeout-minutes: 300
|
build_name: manywheel-py3_9-rocm6_4
|
||||||
build_name: manywheel-py3_10-rocm6_4
|
|
||||||
build_environment: linux-binary-manywheel-rocm
|
build_environment: linux-binary-manywheel-rocm
|
||||||
secrets:
|
secrets:
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
manywheel-py3_10-rocm6_4-test: # Testing
|
manywheel-py3_9-rocm6_4-test: # Testing
|
||||||
if: ${{ github.repository_owner == 'pytorch' }}
|
if: ${{ github.repository_owner == 'pytorch' }}
|
||||||
needs:
|
needs:
|
||||||
- manywheel-py3_10-rocm6_4-build
|
- manywheel-py3_9-rocm6_4-build
|
||||||
- get-label-type
|
- get-label-type
|
||||||
runs-on: linux.rocm.gpu.mi250
|
runs-on: linux.rocm.gpu.mi250
|
||||||
timeout-minutes: 240
|
timeout-minutes: 240
|
||||||
@ -83,14 +82,14 @@ jobs:
|
|||||||
SKIP_ALL_TESTS: 1
|
SKIP_ALL_TESTS: 1
|
||||||
DOCKER_IMAGE: manylinux2_28-builder
|
DOCKER_IMAGE: manylinux2_28-builder
|
||||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||||
DESIRED_PYTHON: "3.10"
|
DESIRED_PYTHON: "3.9"
|
||||||
steps:
|
steps:
|
||||||
- name: Setup ROCm
|
- name: Setup ROCm
|
||||||
uses: ./.github/actions/setup-rocm
|
uses: ./.github/actions/setup-rocm
|
||||||
- uses: actions/download-artifact@v4.1.7
|
- uses: actions/download-artifact@v4.1.7
|
||||||
name: Download Build Artifacts
|
name: Download Build Artifacts
|
||||||
with:
|
with:
|
||||||
name: manywheel-py3_10-rocm6_4
|
name: manywheel-py3_9-rocm6_4
|
||||||
path: "${{ runner.temp }}/artifacts/"
|
path: "${{ runner.temp }}/artifacts/"
|
||||||
- name: Checkout PyTorch
|
- name: Checkout PyTorch
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|||||||
8
.github/workflows/lint.yml
vendored
8
.github/workflows/lint.yml
vendored
@ -31,8 +31,6 @@ jobs:
|
|||||||
if: github.repository_owner == 'pytorch'
|
if: github.repository_owner == 'pytorch'
|
||||||
name: Get changed files
|
name: Get changed files
|
||||||
uses: ./.github/workflows/_get-changed-files.yml
|
uses: ./.github/workflows/_get-changed-files.yml
|
||||||
with:
|
|
||||||
all_files: ${{ contains(github.event.pull_request.labels.*.name, 'lint-all-files') || contains(github.event.pull_request.labels.*.name, 'Reverted') }}
|
|
||||||
|
|
||||||
lintrunner-clang:
|
lintrunner-clang:
|
||||||
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
|
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
|
||||||
@ -55,7 +53,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
timeout: 120
|
timeout: 120
|
||||||
runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
|
runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
|
||||||
docker-image: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter
|
docker-image: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter
|
||||||
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
|
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
|
||||||
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
|
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
@ -266,10 +264,10 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: false
|
submodules: false
|
||||||
fetch-depth: 1
|
fetch-depth: 1
|
||||||
- name: Setup Python 3.10
|
- name: Setup Python 3.9
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: '3.9'
|
||||||
architecture: x64
|
architecture: x64
|
||||||
cache: pip
|
cache: pip
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
|
|||||||
46
.github/workflows/operator_microbenchmark_b200.yml
vendored
Normal file
46
.github/workflows/operator_microbenchmark_b200.yml
vendored
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
name: operator_microbenchmark_b200
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- ciflow/op-benchmark/*
|
||||||
|
workflow_dispatch:
|
||||||
|
schedule:
|
||||||
|
# Run at 06:00 UTC everyday
|
||||||
|
- cron: 0 6 * * *
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
id-token: write
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
opmicrobenchmark-build:
|
||||||
|
if: github.repository_owner == 'pytorch'
|
||||||
|
name: opmicrobenchmark-build
|
||||||
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
|
with:
|
||||||
|
runner: linux.12xlarge.memory
|
||||||
|
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||||
|
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||||
|
cuda-arch-list: '10.0'
|
||||||
|
test-matrix: |
|
||||||
|
{ include: [
|
||||||
|
{ config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
|
||||||
|
]}
|
||||||
|
secrets: inherit
|
||||||
|
|
||||||
|
opmicrobenchmark-test:
|
||||||
|
name: opmicrobenchmark-test
|
||||||
|
uses: ./.github/workflows/_linux-test.yml
|
||||||
|
needs: opmicrobenchmark-build
|
||||||
|
with:
|
||||||
|
timeout-minutes: 500
|
||||||
|
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
|
||||||
|
docker-image: ${{ needs.opmicrobenchmark-build.outputs.docker-image }}
|
||||||
|
test-matrix: ${{ needs.opmicrobenchmark-build.outputs.test-matrix }}
|
||||||
|
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||||
|
secrets: inherit
|
||||||
28
.github/workflows/pull.yml
vendored
28
.github/workflows/pull.yml
vendored
@ -127,8 +127,6 @@ jobs:
|
|||||||
uses: ./.github/workflows/_linux-build.yml
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
with:
|
with:
|
||||||
# More memory is needed to build with asan
|
|
||||||
runner: linux.2xlarge.memory
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
build-environment: linux-jammy-py3.10-clang18-asan
|
build-environment: linux-jammy-py3.10-clang18-asan
|
||||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan
|
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan
|
||||||
@ -318,6 +316,32 @@ jobs:
|
|||||||
]}
|
]}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
linux-jammy-py3-clang12-executorch-build:
|
||||||
|
if: false # Docker build needs pin update
|
||||||
|
name: linux-jammy-py3-clang12-executorch
|
||||||
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
|
needs: get-label-type
|
||||||
|
with:
|
||||||
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
|
build-environment: linux-jammy-py3-clang12-executorch
|
||||||
|
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang12-executorch
|
||||||
|
test-matrix: |
|
||||||
|
{ include: [
|
||||||
|
{ config: "executorch", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
||||||
|
]}
|
||||||
|
secrets: inherit
|
||||||
|
|
||||||
|
linux-jammy-py3-clang12-executorch-test:
|
||||||
|
name: linux-jammy-py3-clang12-executorch
|
||||||
|
uses: ./.github/workflows/_linux-test.yml
|
||||||
|
needs: linux-jammy-py3-clang12-executorch-build
|
||||||
|
if: false # Has been broken for a while
|
||||||
|
with:
|
||||||
|
build-environment: linux-jammy-py3-clang12-executorch
|
||||||
|
docker-image: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.docker-image }}
|
||||||
|
test-matrix: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.test-matrix }}
|
||||||
|
secrets: inherit
|
||||||
|
|
||||||
linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
|
linux-jammy-cuda12_8-py3_10-gcc9-inductor-build:
|
||||||
name: cuda12.8-py3.10-gcc9-sm75
|
name: cuda12.8-py3.10-gcc9-sm75
|
||||||
uses: ./.github/workflows/_linux-build.yml
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
|
|||||||
54
.github/workflows/quantization-periodic.yml
vendored
54
.github/workflows/quantization-periodic.yml
vendored
@ -1,54 +0,0 @@
|
|||||||
name: quantization-periodic
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- ciflow/quantization-periodic/*
|
|
||||||
workflow_dispatch:
|
|
||||||
schedule:
|
|
||||||
# run weekly
|
|
||||||
- cron: "45 0 * * 0"
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
get-default-label-prefix:
|
|
||||||
name: get-default-label-prefix
|
|
||||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
|
||||||
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
|
|
||||||
with:
|
|
||||||
triggering_actor: ${{ github.triggering_actor }}
|
|
||||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
|
||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
|
||||||
opt_out_experiments: lf
|
|
||||||
|
|
||||||
periodic-quantization-build:
|
|
||||||
name: periodic-quantization-build
|
|
||||||
uses: ./.github/workflows/_linux-build.yml
|
|
||||||
needs: get-default-label-prefix
|
|
||||||
with:
|
|
||||||
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
|
|
||||||
build-environment: linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
|
||||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
|
||||||
cuda-arch-list: '8.9'
|
|
||||||
test-matrix: |
|
|
||||||
{ include: [
|
|
||||||
{ config: "quantization", shard: 1, num_shards: 1, runner: "${{ needs.get-default-label-prefix.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
|
|
||||||
]}
|
|
||||||
secrets: inherit
|
|
||||||
periodic-test-quantization:
|
|
||||||
name: periodic-test-quantization
|
|
||||||
uses: ./.github/workflows/_linux-test.yml
|
|
||||||
needs: periodic-quantization-build
|
|
||||||
with:
|
|
||||||
build-environment: linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
|
||||||
docker-image: ${{ needs.periodic-quantization-build.outputs.docker-image }}
|
|
||||||
test-matrix: ${{ needs.periodic-quantization-build.outputs.test-matrix }}
|
|
||||||
secrets: inherit
|
|
||||||
2
.github/workflows/slow.yml
vendored
2
.github/workflows/slow.yml
vendored
@ -140,8 +140,6 @@ jobs:
|
|||||||
uses: ./.github/workflows/_linux-build.yml
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
with:
|
with:
|
||||||
# More memory is needed to build with asan
|
|
||||||
runner: linux.2xlarge.memory
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
build-environment: linux-jammy-py3.10-clang18-asan
|
build-environment: linux-jammy-py3.10-clang18-asan
|
||||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan
|
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan
|
||||||
|
|||||||
76
.github/workflows/test-b200.yml
vendored
76
.github/workflows/test-b200.yml
vendored
@ -1,76 +0,0 @@
|
|||||||
# B200 Smoke Tests CI Workflow
|
|
||||||
#
|
|
||||||
# This workflow runs smoke tests on B200 hardware
|
|
||||||
#
|
|
||||||
# Flow:
|
|
||||||
# 1. Builds PyTorch with CUDA 12.8+ and sm100 architecture for B200
|
|
||||||
# 2. Runs smoke tests on linux.dgx.b200 runner
|
|
||||||
# 3. Tests executed are defined in .ci/pytorch/test.sh -> test_python_smoke() function
|
|
||||||
#
|
|
||||||
# Triggered by:
|
|
||||||
# - Pull requests modifying this workflow file
|
|
||||||
# - Manual dispatch
|
|
||||||
# - Schedule (every 6 hours)
|
|
||||||
# - Adding ciflow/b200 label to a PR (creates ciflow/b200/* tag)
|
|
||||||
|
|
||||||
name: B200 Smoke Tests
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- .github/workflows/test-b200.yml
|
|
||||||
workflow_dispatch:
|
|
||||||
schedule:
|
|
||||||
- cron: 0 4,10,16,22 * * * # every 6 hours
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- ciflow/b200/*
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
id-token: write
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
|
|
||||||
get-label-type:
|
|
||||||
if: github.repository_owner == 'pytorch'
|
|
||||||
name: get-label-type
|
|
||||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
|
||||||
with:
|
|
||||||
triggering_actor: ${{ github.triggering_actor }}
|
|
||||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
|
||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
|
||||||
|
|
||||||
linux-jammy-cuda12_8-py3_10-gcc11-sm100-build:
|
|
||||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
|
||||||
uses: ./.github/workflows/_linux-build.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
runner: linux.12xlarge.memory
|
|
||||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
|
||||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
|
||||||
cuda-arch-list: '10.0'
|
|
||||||
test-matrix: |
|
|
||||||
{ include: [
|
|
||||||
{ config: "smoke_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
|
|
||||||
]}
|
|
||||||
# config: "smoke_b200" maps to test_python_smoke_b200() in .ci/pytorch/test.sh
|
|
||||||
secrets: inherit
|
|
||||||
|
|
||||||
linux-jammy-cuda12_8-py3_10-gcc11-sm100-test:
|
|
||||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
|
||||||
uses: ./.github/workflows/_linux-test.yml
|
|
||||||
needs:
|
|
||||||
- linux-jammy-cuda12_8-py3_10-gcc11-sm100-build
|
|
||||||
with:
|
|
||||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm100
|
|
||||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build.outputs.docker-image }}
|
|
||||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm100-build.outputs.test-matrix }}
|
|
||||||
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
|
||||||
secrets: inherit
|
|
||||||
24
.github/workflows/trunk.yml
vendored
24
.github/workflows/trunk.yml
vendored
@ -259,27 +259,3 @@ jobs:
|
|||||||
docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }}
|
docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }}
|
||||||
test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }}
|
test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
linux-jammy-py3-clang12-executorch-build:
|
|
||||||
name: linux-jammy-py3-clang12-executorch
|
|
||||||
uses: ./.github/workflows/_linux-build.yml
|
|
||||||
needs: get-label-type
|
|
||||||
with:
|
|
||||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
|
||||||
build-environment: linux-jammy-py3-clang12-executorch
|
|
||||||
docker-image-name: ci-image:pytorch-linux-jammy-py3-clang12-executorch
|
|
||||||
test-matrix: |
|
|
||||||
{ include: [
|
|
||||||
{ config: "executorch", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
|
|
||||||
]}
|
|
||||||
secrets: inherit
|
|
||||||
|
|
||||||
linux-jammy-py3-clang12-executorch-test:
|
|
||||||
name: linux-jammy-py3-clang12-executorch
|
|
||||||
uses: ./.github/workflows/_linux-test.yml
|
|
||||||
needs: linux-jammy-py3-clang12-executorch-build
|
|
||||||
with:
|
|
||||||
build-environment: linux-jammy-py3-clang12-executorch
|
|
||||||
docker-image: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.docker-image }}
|
|
||||||
test-matrix: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.test-matrix }}
|
|
||||||
secrets: inherit
|
|
||||||
|
|||||||
24
.github/workflows/unstable.yml
vendored
24
.github/workflows/unstable.yml
vendored
@ -53,3 +53,27 @@ jobs:
|
|||||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||||
curr_ref_type: ${{ github.ref_type }}
|
curr_ref_type: ${{ github.ref_type }}
|
||||||
|
|
||||||
|
linux-jammy-py3_9-clang9-xla-build:
|
||||||
|
name: linux-jammy-py3_9-clang9-xla
|
||||||
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
|
needs: get-label-type
|
||||||
|
with:
|
||||||
|
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||||
|
build-environment: linux-jammy-py3.9-clang9-xla
|
||||||
|
docker-image-name: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:v1.3-lite
|
||||||
|
test-matrix: |
|
||||||
|
{ include: [
|
||||||
|
{ config: "xla", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
|
||||||
|
]}
|
||||||
|
secrets: inherit
|
||||||
|
|
||||||
|
linux-jammy-py3_9-clang9-xla-test:
|
||||||
|
name: linux-jammy-py3_9-clang9-xla
|
||||||
|
uses: ./.github/workflows/_linux-test.yml
|
||||||
|
needs: linux-jammy-py3_9-clang9-xla-build
|
||||||
|
with:
|
||||||
|
build-environment: linux-jammy-py3.9-clang9-xla
|
||||||
|
docker-image: ${{ needs.linux-jammy-py3_9-clang9-xla-build.outputs.docker-image }}
|
||||||
|
test-matrix: ${{ needs.linux-jammy-py3_9-clang9-xla-build.outputs.test-matrix }}
|
||||||
|
secrets: inherit
|
||||||
|
|||||||
2
.github/workflows/vllm.yml
vendored
2
.github/workflows/vllm.yml
vendored
@ -36,8 +36,6 @@ jobs:
|
|||||||
uses: ./.github/workflows/_linux-build.yml
|
uses: ./.github/workflows/_linux-build.yml
|
||||||
needs: get-label-type
|
needs: get-label-type
|
||||||
with:
|
with:
|
||||||
# When building vLLM, uv doesn't like that we rename wheel without changing the wheel metadata
|
|
||||||
allow-reuse-old-whl: false
|
|
||||||
build-additional-packages: "vision audio"
|
build-additional-packages: "vision audio"
|
||||||
build-external-packages: "vllm"
|
build-external-packages: "vllm"
|
||||||
build-environment: linux-jammy-cuda12.8-py3.12-gcc11
|
build-environment: linux-jammy-cuda12.8-py3.12-gcc11
|
||||||
|
|||||||
4
.gitignore
vendored
4
.gitignore
vendored
@ -82,7 +82,6 @@ torch/return_types.pyi
|
|||||||
torch/nn/functional.pyi
|
torch/nn/functional.pyi
|
||||||
torch/utils/data/datapipes/datapipe.pyi
|
torch/utils/data/datapipes/datapipe.pyi
|
||||||
torch/csrc/autograd/generated/*
|
torch/csrc/autograd/generated/*
|
||||||
torch/csrc/functionalization/generated/*
|
|
||||||
torch/csrc/lazy/generated/*.[!m]*
|
torch/csrc/lazy/generated/*.[!m]*
|
||||||
torch_compile_debug/
|
torch_compile_debug/
|
||||||
# Listed manually because some files in this directory are not generated
|
# Listed manually because some files in this directory are not generated
|
||||||
@ -260,9 +259,6 @@ gen
|
|||||||
.pytest_cache
|
.pytest_cache
|
||||||
aten/build/*
|
aten/build/*
|
||||||
|
|
||||||
# Linker scripts for prioritized text optimization
|
|
||||||
cmake/linker_script.ld
|
|
||||||
|
|
||||||
# Bram
|
# Bram
|
||||||
plsdontbreak
|
plsdontbreak
|
||||||
|
|
||||||
|
|||||||
@ -49,7 +49,7 @@ init_command = [
|
|||||||
'mccabe==0.7.0',
|
'mccabe==0.7.0',
|
||||||
'pycodestyle==2.14.0',
|
'pycodestyle==2.14.0',
|
||||||
'pyflakes==3.4.0',
|
'pyflakes==3.4.0',
|
||||||
'torchfix==0.4.0 ; python_version >= "3.10" and python_version < "3.13"',
|
'torchfix==0.4.0 ; python_version >= "3.9" and python_version < "3.13"',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -123,7 +123,6 @@ is_formatter = true
|
|||||||
code = 'MYPY'
|
code = 'MYPY'
|
||||||
include_patterns = [
|
include_patterns = [
|
||||||
'setup.py',
|
'setup.py',
|
||||||
'functorch/dim/**/*.py',
|
|
||||||
'torch/**/*.py',
|
'torch/**/*.py',
|
||||||
'torch/**/*.pyi',
|
'torch/**/*.pyi',
|
||||||
'caffe2/**/*.py',
|
'caffe2/**/*.py',
|
||||||
@ -153,7 +152,7 @@ init_command = [
|
|||||||
'python3',
|
'python3',
|
||||||
'tools/linter/adapters/pip_init.py',
|
'tools/linter/adapters/pip_init.py',
|
||||||
'--dry-run={{DRYRUN}}',
|
'--dry-run={{DRYRUN}}',
|
||||||
'numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11"',
|
'numpy==1.26.4 ; python_version >= "3.9" and python_version <= "3.11"',
|
||||||
'numpy==2.1.0 ; python_version >= "3.12"',
|
'numpy==2.1.0 ; python_version >= "3.12"',
|
||||||
'expecttest==0.3.0',
|
'expecttest==0.3.0',
|
||||||
'mypy==1.16.0',
|
'mypy==1.16.0',
|
||||||
@ -196,7 +195,6 @@ exclude_patterns = [
|
|||||||
'tools/test/gen_operators_yaml_test.py',
|
'tools/test/gen_operators_yaml_test.py',
|
||||||
'tools/test/gen_oplist_test.py',
|
'tools/test/gen_oplist_test.py',
|
||||||
'tools/test/test_selective_build.py',
|
'tools/test/test_selective_build.py',
|
||||||
'tools/experimental/dynamic_shapes/torchfuzz/**',
|
|
||||||
]
|
]
|
||||||
command = [
|
command = [
|
||||||
'python3',
|
'python3',
|
||||||
@ -966,6 +964,7 @@ exclude_patterns = [
|
|||||||
'test/jit/**', # should be run through test/test_jit.py
|
'test/jit/**', # should be run through test/test_jit.py
|
||||||
'test/ao/sparsity/**', # should be run through test/test_ao_sparsity.py
|
'test/ao/sparsity/**', # should be run through test/test_ao_sparsity.py
|
||||||
'test/fx/**', # should be run through test/test_fx.py
|
'test/fx/**', # should be run through test/test_fx.py
|
||||||
|
'test/bottleneck_test/**', # excluded by test/run_test.py
|
||||||
'test/package/**', # excluded by test/run_test.py
|
'test/package/**', # excluded by test/run_test.py
|
||||||
'test/distributed/argparse_util_test.py',
|
'test/distributed/argparse_util_test.py',
|
||||||
'test/distributed/bin/test_script.py',
|
'test/distributed/bin/test_script.py',
|
||||||
@ -1411,6 +1410,8 @@ exclude_patterns = [
|
|||||||
'torch/utils/benchmark/utils/timer.py',
|
'torch/utils/benchmark/utils/timer.py',
|
||||||
'torch/utils/benchmark/utils/valgrind_wrapper/__init__.py',
|
'torch/utils/benchmark/utils/valgrind_wrapper/__init__.py',
|
||||||
'torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py',
|
'torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py',
|
||||||
|
'torch/utils/bottleneck/__init__.py',
|
||||||
|
'torch/utils/bottleneck/__main__.py',
|
||||||
'torch/utils/bundled_inputs.py',
|
'torch/utils/bundled_inputs.py',
|
||||||
'torch/utils/checkpoint.py',
|
'torch/utils/checkpoint.py',
|
||||||
'torch/utils/collect_env.py',
|
'torch/utils/collect_env.py',
|
||||||
@ -1453,7 +1454,7 @@ init_command = [
|
|||||||
'--dry-run={{DRYRUN}}',
|
'--dry-run={{DRYRUN}}',
|
||||||
'usort==1.0.8.post1',
|
'usort==1.0.8.post1',
|
||||||
'isort==6.0.1',
|
'isort==6.0.1',
|
||||||
'ruff==0.13.1', # sync with RUFF
|
'ruff==0.12.9', # sync with RUFF
|
||||||
]
|
]
|
||||||
is_formatter = true
|
is_formatter = true
|
||||||
|
|
||||||
@ -1587,7 +1588,7 @@ init_command = [
|
|||||||
'python3',
|
'python3',
|
||||||
'tools/linter/adapters/pip_init.py',
|
'tools/linter/adapters/pip_init.py',
|
||||||
'--dry-run={{DRYRUN}}',
|
'--dry-run={{DRYRUN}}',
|
||||||
'ruff==0.13.1', # sync with PYFMT
|
'ruff==0.12.9', # sync with PYFMT
|
||||||
]
|
]
|
||||||
is_formatter = true
|
is_formatter = true
|
||||||
|
|
||||||
|
|||||||
37
BUILD.bazel
37
BUILD.bazel
@ -22,7 +22,6 @@ COMMON_COPTS = [
|
|||||||
"-DHAVE_SHM_UNLINK=1",
|
"-DHAVE_SHM_UNLINK=1",
|
||||||
"-D_FILE_OFFSET_BITS=64",
|
"-D_FILE_OFFSET_BITS=64",
|
||||||
"-DUSE_FBGEMM",
|
"-DUSE_FBGEMM",
|
||||||
"-DUSE_DISTRIBUTED",
|
|
||||||
"-DAT_PER_OPERATOR_HEADERS",
|
"-DAT_PER_OPERATOR_HEADERS",
|
||||||
"-DATEN_THREADING=NATIVE",
|
"-DATEN_THREADING=NATIVE",
|
||||||
"-DNO_CUDNN_DESTROY_HANDLE",
|
"-DNO_CUDNN_DESTROY_HANDLE",
|
||||||
@ -91,8 +90,6 @@ generated_cpu_cpp = [
|
|||||||
"aten/src/ATen/NativeMetaFunctions.h",
|
"aten/src/ATen/NativeMetaFunctions.h",
|
||||||
"aten/src/ATen/RegistrationDeclarations.h",
|
"aten/src/ATen/RegistrationDeclarations.h",
|
||||||
"aten/src/ATen/VmapGeneratedPlumbing.h",
|
"aten/src/ATen/VmapGeneratedPlumbing.h",
|
||||||
"aten/src/ATen/ViewMetaClasses.h",
|
|
||||||
"aten/src/ATen/ViewMetaClasses.cpp",
|
|
||||||
"aten/src/ATen/core/aten_interned_strings.h",
|
"aten/src/ATen/core/aten_interned_strings.h",
|
||||||
"aten/src/ATen/core/enum_tag.h",
|
"aten/src/ATen/core/enum_tag.h",
|
||||||
"aten/src/ATen/core/TensorBody.h",
|
"aten/src/ATen/core/TensorBody.h",
|
||||||
@ -813,7 +810,7 @@ cc_library(
|
|||||||
name = "torch_python",
|
name = "torch_python",
|
||||||
srcs = libtorch_python_core_sources
|
srcs = libtorch_python_core_sources
|
||||||
+ if_cuda(libtorch_python_cuda_sources)
|
+ if_cuda(libtorch_python_cuda_sources)
|
||||||
+ if_cuda(libtorch_python_distributed_sources)
|
+ libtorch_python_distributed_sources
|
||||||
+ GENERATED_AUTOGRAD_PYTHON,
|
+ GENERATED_AUTOGRAD_PYTHON,
|
||||||
hdrs = glob([
|
hdrs = glob([
|
||||||
"torch/csrc/generic/*.cpp",
|
"torch/csrc/generic/*.cpp",
|
||||||
@ -835,6 +832,36 @@ pybind_extension(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "functorch",
|
||||||
|
hdrs = glob([
|
||||||
|
"functorch/csrc/dim/*.h",
|
||||||
|
]),
|
||||||
|
srcs = glob([
|
||||||
|
"functorch/csrc/dim/*.cpp",
|
||||||
|
]),
|
||||||
|
deps = [
|
||||||
|
":aten_nvrtc",
|
||||||
|
":torch_python",
|
||||||
|
"@pybind11",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
pybind_extension(
|
||||||
|
name = "functorch/_C",
|
||||||
|
copts=[
|
||||||
|
"-DTORCH_EXTENSION_NAME=_C"
|
||||||
|
],
|
||||||
|
srcs = [
|
||||||
|
"functorch/csrc/init_dim_only.cpp",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
":functorch",
|
||||||
|
":torch_python",
|
||||||
|
":aten_nvrtc",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "torch/bin/torch_shm_manager",
|
name = "torch/bin/torch_shm_manager",
|
||||||
srcs = [
|
srcs = [
|
||||||
@ -875,6 +902,7 @@ py_library(
|
|||||||
],
|
],
|
||||||
data = [
|
data = [
|
||||||
":torch/_C.so",
|
":torch/_C.so",
|
||||||
|
":functorch/_C.so",
|
||||||
":torch/bin/torch_shm_manager",
|
":torch/bin/torch_shm_manager",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -1077,7 +1105,6 @@ test_suite(
|
|||||||
"aten/src/ATen/templates/LazyNonNativeIr.h",
|
"aten/src/ATen/templates/LazyNonNativeIr.h",
|
||||||
"aten/src/ATen/templates/RegisterDispatchKey.cpp",
|
"aten/src/ATen/templates/RegisterDispatchKey.cpp",
|
||||||
"aten/src/ATen/templates/RegisterDispatchDefinitions.ini",
|
"aten/src/ATen/templates/RegisterDispatchDefinitions.ini",
|
||||||
"aten/src/ATen/templates/ViewMetaClassesPythonBinding.cpp",
|
|
||||||
"aten/src/ATen/native/native_functions.yaml",
|
"aten/src/ATen/native/native_functions.yaml",
|
||||||
"aten/src/ATen/native/tags.yaml",
|
"aten/src/ATen/native/tags.yaml",
|
||||||
"aten/src/ATen/native/ts_native_functions.yaml",
|
"aten/src/ATen/native/ts_native_functions.yaml",
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.27 FATAL_ERROR)
|
||||||
|
# cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0023 NEW)
|
||||||
|
|
||||||
# Use compiler ID "AppleClang" instead of "Clang" for XCode. Not setting this
|
# Use compiler ID "AppleClang" instead of "Clang" for XCode. Not setting this
|
||||||
# sometimes makes XCode C compiler gets detected as "Clang", even when the C++
|
# sometimes makes XCode C compiler gets detected as "Clang", even when the C++
|
||||||
@ -180,8 +181,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le)")
|
|||||||
set(CPU_POWER ON)
|
set(CPU_POWER ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not
|
# For non-supported platforms, turn USE_DISTRIBUTED off by default.
|
||||||
# tested and likely won't work without additional changes.
|
# NB: USE_DISTRIBUTED simply disables the backend; distributed code
|
||||||
|
# still gets built
|
||||||
if(NOT LINUX AND NOT WIN32)
|
if(NOT LINUX AND NOT WIN32)
|
||||||
set(USE_DISTRIBUTED
|
set(USE_DISTRIBUTED
|
||||||
OFF
|
OFF
|
||||||
@ -261,11 +263,11 @@ option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF)
|
|||||||
option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF)
|
option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF)
|
||||||
option(USE_NATIVE_ARCH "Use -march=native" OFF)
|
option(USE_NATIVE_ARCH "Use -march=native" OFF)
|
||||||
cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF)
|
cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF)
|
||||||
option(USE_DISTRIBUTED "Use distributed" ON)
|
option(USE_DISTRIBUTED "Enable default distributed backends" ON)
|
||||||
cmake_dependent_option(USE_NCCL "Use NCCL" ON
|
cmake_dependent_option(USE_NCCL "Use NCCL" ON
|
||||||
"USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
|
"USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
|
||||||
cmake_dependent_option(USE_XCCL "Use XCCL" ON
|
cmake_dependent_option(USE_XCCL "Use XCCL" ON
|
||||||
"USE_XPU;UNIX;NOT APPLE" OFF)
|
"USE_DISTRIBUTED;USE_XPU;UNIX;NOT APPLE" OFF)
|
||||||
cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF)
|
cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF)
|
||||||
cmake_dependent_option(USE_RCCL "Use RCCL" ON "USE_NCCL;NOT WIN32" OFF)
|
cmake_dependent_option(USE_RCCL "Use RCCL" ON "USE_NCCL;NOT WIN32" OFF)
|
||||||
cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF)
|
cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF)
|
||||||
@ -378,13 +380,6 @@ cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
|
|||||||
OFF "USE_CUDA" OFF)
|
OFF "USE_CUDA" OFF)
|
||||||
cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
|
cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
|
||||||
"CPU_AARCH64" OFF)
|
"CPU_AARCH64" OFF)
|
||||||
# prioritized text linker, ON by default for AArch64+Linux, option visible to all AArch64, x86 and ppc64le.
|
|
||||||
set(USE_PRIORITIZED_TEXT_DEFAULT OFF)
|
|
||||||
if(LINUX AND CPU_AARCH64)
|
|
||||||
set(USE_PRIORITIZED_TEXT_DEFAULT ON)
|
|
||||||
endif()
|
|
||||||
cmake_dependent_option(USE_PRIORITIZED_TEXT_FOR_LD "Use prioritized text linker for ld."
|
|
||||||
"${USE_PRIORITIZED_TEXT_DEFAULT}" "CPU_INTEL OR CPU_AARCH64 OR CPU_POWER" OFF)
|
|
||||||
|
|
||||||
option(USE_MIMALLOC "Use mimalloc" OFF)
|
option(USE_MIMALLOC "Use mimalloc" OFF)
|
||||||
# Enable third party mimalloc library to improve memory allocation performance
|
# Enable third party mimalloc library to improve memory allocation performance
|
||||||
@ -437,12 +432,11 @@ if(WIN32)
|
|||||||
PATH_SUFFIXES lib
|
PATH_SUFFIXES lib
|
||||||
NO_DEFAULT_PATH)
|
NO_DEFAULT_PATH)
|
||||||
if(NOT libuv_tmp_LIBRARY)
|
if(NOT libuv_tmp_LIBRARY)
|
||||||
set(USE_DISTRIBUTED OFF)
|
|
||||||
set(USE_GLOO OFF)
|
set(USE_GLOO OFF)
|
||||||
message(
|
message(
|
||||||
WARNING
|
WARNING
|
||||||
"Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. "
|
"Libuv is not installed in current conda env. Set USE_GLOO to OFF. "
|
||||||
"Please run command 'conda install -c conda-forge libuv=1.51' to install libuv."
|
"Please run command 'conda install -c conda-forge libuv=1.39' to install libuv."
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../)
|
set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../)
|
||||||
@ -663,11 +657,6 @@ endif(MSVC)
|
|||||||
|
|
||||||
string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
|
string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
|
||||||
|
|
||||||
# Set linker max-page-size to 64KiB on AArch64 Linux
|
|
||||||
if(LINUX AND CPU_AARCH64)
|
|
||||||
add_link_options_if_supported("-z,max-page-size=0x10000")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
|
# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
|
||||||
# applicable to mobile are disabled by this variable. Setting
|
# applicable to mobile are disabled by this variable. Setting
|
||||||
# `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it
|
# `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it
|
||||||
@ -902,7 +891,7 @@ IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Set USE_FBGEMM_GENAI to ON for CUDA build on SM100.
|
# Set USE_FBGEMM_GENAI to ON for CUDA build on SM100.
|
||||||
if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8 AND NOT WIN32)
|
if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
|
||||||
message(STATUS "Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a")
|
message(STATUS "Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a")
|
||||||
set(USE_FBGEMM_GENAI ON)
|
set(USE_FBGEMM_GENAI ON)
|
||||||
endif()
|
endif()
|
||||||
@ -1390,6 +1379,10 @@ endif()
|
|||||||
include(cmake/Summary.cmake)
|
include(cmake/Summary.cmake)
|
||||||
caffe2_print_configuration_summary()
|
caffe2_print_configuration_summary()
|
||||||
|
|
||||||
|
if(BUILD_FUNCTORCH)
|
||||||
|
add_subdirectory(functorch)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Parse custom debug info
|
# Parse custom debug info
|
||||||
if(DEFINED USE_CUSTOM_DEBINFO)
|
if(DEFINED USE_CUSTOM_DEBINFO)
|
||||||
string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}")
|
string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}")
|
||||||
@ -1428,57 +1421,3 @@ if(BUILD_BUNDLE_PTXAS AND USE_CUDA)
|
|||||||
install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas"
|
install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas"
|
||||||
DESTINATION "${CMAKE_INSTALL_BINDIR}")
|
DESTINATION "${CMAKE_INSTALL_BINDIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(USE_PRIORITIZED_TEXT_FOR_LD)
|
|
||||||
add_compile_options(
|
|
||||||
$<$<COMPILE_LANGUAGE:C,CXX>:-ffunction-sections>
|
|
||||||
$<$<COMPILE_LANGUAGE:C,CXX>:-fdata-sections>
|
|
||||||
)
|
|
||||||
set(LINKER_SCRIPT_FILE_OUT "${CMAKE_SOURCE_DIR}/cmake/linker_script.ld")
|
|
||||||
set(LINKER_SCRIPT_FILE_IN "${CMAKE_SOURCE_DIR}/cmake/prioritized_text.txt")
|
|
||||||
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT "${LINKER_SCRIPT_FILE_OUT}"
|
|
||||||
COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py --filein "${LINKER_SCRIPT_FILE_IN}" --fout "${LINKER_SCRIPT_FILE_OUT}"
|
|
||||||
DEPENDS ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py "${LINKER_SCRIPT_FILE_IN}"
|
|
||||||
COMMENT "Generating prioritized text linker files"
|
|
||||||
VERBATIM
|
|
||||||
)
|
|
||||||
|
|
||||||
add_custom_target(generate_linker_script DEPENDS "${LINKER_SCRIPT_FILE_OUT}")
|
|
||||||
|
|
||||||
if(BUILD_PYTHON)
|
|
||||||
set(LINKER_OPT_TARGETS torch_python)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(NOT BUILD_LIBTORCHLESS)
|
|
||||||
list(APPEND LINKER_OPT_TARGETS torch_cpu c10)
|
|
||||||
if(USE_CUDA)
|
|
||||||
list(APPEND LINKER_OPT_TARGETS torch_cuda c10_cuda)
|
|
||||||
endif()
|
|
||||||
if(USE_XPU)
|
|
||||||
list(APPEND LINKER_OPT_TARGETS torch_xpu c10_xpu)
|
|
||||||
endif()
|
|
||||||
if(USE_ROCM)
|
|
||||||
list(APPEND LINKER_OPT_TARGETS torch_hip c10_hip)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
foreach(tgt IN LISTS LINKER_OPT_TARGETS)
|
|
||||||
if(TARGET ${tgt})
|
|
||||||
add_dependencies("${tgt}" generate_linker_script)
|
|
||||||
target_link_options_if_supported(${tgt} "-T,${LINKER_SCRIPT_FILE_OUT}")
|
|
||||||
set_property(TARGET ${tgt} APPEND PROPERTY LINK_DEPENDS "${LINKER_SCRIPT_FILE_OUT}")
|
|
||||||
else()
|
|
||||||
message(WARNING "Requested target '${tgt}' for linker script optimization was not found.")
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
|
|
||||||
else()
|
|
||||||
if(LINUX AND CPU_AARCH64)
|
|
||||||
message(WARNING [[
|
|
||||||
It is strongly recommend to enable linker script optimization for all AArch64 Linux builds.
|
|
||||||
To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
|
|
||||||
]])
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|||||||
105
MANIFEST.in
105
MANIFEST.in
@ -1,61 +1,20 @@
|
|||||||
# Reference: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
|
# Reference: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
|
||||||
|
|
||||||
# Include individual top-level files
|
# Include source files in SDist
|
||||||
include CITATION.cff
|
include CMakeLists.txt
|
||||||
include CODEOWNERS
|
include *.bzl *.bazel .bazel* BUILD *.BUILD BUILD.* WORKSPACE
|
||||||
include Dockerfile
|
include BUCK BUCK.*
|
||||||
include LICENSE
|
include requirements*.txt
|
||||||
include MANIFEST.in
|
include version.txt
|
||||||
include Makefile
|
include [Mm]akefile *.[Mm]akefile [Mm]akefile.*
|
||||||
include NOTICE
|
include [Dd]ockerfile *.[Dd]ockerfile [Dd]ockerfile.* .dockerignore
|
||||||
include .bc-linter.yml
|
|
||||||
include .clang-format .clang-tidy
|
|
||||||
include .cmakelintrc
|
|
||||||
include .coveragerc
|
|
||||||
include .dockerignore
|
|
||||||
include .editorconfig
|
|
||||||
include .flake8
|
|
||||||
include .gdbinit
|
|
||||||
include .lintrunner.toml
|
|
||||||
include .lldbinit
|
|
||||||
include codex_setup.sh
|
|
||||||
include docker.Makefile
|
|
||||||
include pyrefly.toml
|
|
||||||
include ubsan.supp
|
|
||||||
|
|
||||||
# Include bazel and BUCK related files
|
|
||||||
include BUILD.bazel BUCK.oss
|
|
||||||
include WORKSPACE
|
|
||||||
include *.bzl
|
|
||||||
include .bazelignore .bazelrc .bazelversion
|
|
||||||
|
|
||||||
# Include general configuration files
|
|
||||||
include *.ini
|
|
||||||
# Include important top-level information
|
|
||||||
include *.md
|
|
||||||
# Include technical text files at the moment, comprises
|
|
||||||
# version.txt, CMakeLists.txt, requirements.txt
|
|
||||||
include *.txt
|
|
||||||
|
|
||||||
# Include ctags configuration
|
|
||||||
include .ctags.d/*.ctags
|
|
||||||
|
|
||||||
# Include subfolders completely
|
|
||||||
graft .devcontainer
|
|
||||||
graft .vscode
|
|
||||||
graft android
|
graft android
|
||||||
graft aten
|
graft aten
|
||||||
graft benchmarks
|
|
||||||
graft binaries
|
graft binaries
|
||||||
graft c10
|
graft c10
|
||||||
graft caffe2
|
graft caffe2
|
||||||
graft cmake
|
graft cmake
|
||||||
graft docs
|
|
||||||
graft functorch
|
graft functorch
|
||||||
graft ios
|
|
||||||
graft mypy_plugins
|
|
||||||
graft scripts
|
|
||||||
graft test
|
|
||||||
graft third_party
|
graft third_party
|
||||||
graft tools
|
graft tools
|
||||||
graft torch
|
graft torch
|
||||||
@ -63,37 +22,29 @@ graft torchgen
|
|||||||
# FIXME: torch-xla build during codegen will fail if include this file in wheel
|
# FIXME: torch-xla build during codegen will fail if include this file in wheel
|
||||||
exclude torchgen/BUILD.bazel
|
exclude torchgen/BUILD.bazel
|
||||||
|
|
||||||
# The following exclusions omit parts from third-party dependencies that
|
# Misc files and directories in SDist
|
||||||
# contain invalid symlinks[1] and that are not needed for pytorch, such as
|
include *.md
|
||||||
# bindings for unused languages
|
include CITATION.cff
|
||||||
prune third_party/flatbuffers/java
|
include LICENSE NOTICE
|
||||||
prune third_party/flatbuffers/kotlin
|
include mypy*.ini
|
||||||
prune third_party/ittapi/rust
|
graft benchmarks
|
||||||
prune third_party/nccl/pkg/debian
|
graft docs
|
||||||
prune third_party/opentelemetry-cpp/third_party/prometheus-cpp/cmake/project-import-*
|
graft mypy_plugins
|
||||||
|
graft scripts
|
||||||
# The following document is also an invalid symlink[1] and superfluous
|
|
||||||
exclude third_party/flatbuffers/docs/source/CONTRIBUTING.md
|
|
||||||
|
|
||||||
# Omit autogenerated code
|
|
||||||
prune torchgen/packaged
|
|
||||||
|
|
||||||
# Omit caches, compiled, and scm related content
|
|
||||||
prune */__pycache__
|
|
||||||
prune **/.github
|
|
||||||
prune **/.gitlab
|
|
||||||
global-exclude *.o *.obj *.so *.dylib *.a *.pxd *.dll *.lib
|
|
||||||
global-exclude *.py[cod] *.swp *~
|
|
||||||
global-exclude .git .git-blame-ignore-revs .gitattributes .gitignore .gitmodules
|
|
||||||
global-exclude .gitlab-ci.yml
|
|
||||||
|
|
||||||
# Misc files needed for custom setuptools command
|
# Misc files needed for custom setuptools command
|
||||||
include .gitignore
|
include .gitignore
|
||||||
include .gitmodules
|
include .gitmodules
|
||||||
|
|
||||||
# [1] Invalid symlinks for the purposes of Python source distributions are,
|
# Include test suites in SDist
|
||||||
# according to the source distribution format[2] links pointing outside the
|
graft test
|
||||||
# destination directory or links with a `..` component, which is those of
|
include pytest.ini
|
||||||
# concern here.
|
include .coveragerc
|
||||||
|
|
||||||
# [2] https://packaging.python.org/en/latest/specifications/source-distribution-format/#source-distribution-archive-features
|
# Prune generated/compiled files
|
||||||
|
prune torchgen/packaged
|
||||||
|
prune */__pycache__
|
||||||
|
global-exclude *.o *.obj *.so *.a *.dylib *.pxd *.dll *.lib *.py[cod]
|
||||||
|
|
||||||
|
prune */.git
|
||||||
|
global-exclude .git *~ *.swp
|
||||||
|
|||||||
@ -161,7 +161,7 @@ They require JetPack 4.2 and above, and [@dusty-nv](https://github.com/dusty-nv)
|
|||||||
|
|
||||||
#### Prerequisites
|
#### Prerequisites
|
||||||
If you are installing from source, you will need:
|
If you are installing from source, you will need:
|
||||||
- Python 3.10 or later
|
- Python 3.9 or later
|
||||||
- A compiler that fully supports C++17, such as clang or gcc (gcc 9.4.0 or newer is required, on Linux)
|
- A compiler that fully supports C++17, such as clang or gcc (gcc 9.4.0 or newer is required, on Linux)
|
||||||
- Visual Studio or Visual Studio Build Tool (Windows only)
|
- Visual Studio or Visual Studio Build Tool (Windows only)
|
||||||
|
|
||||||
@ -275,7 +275,7 @@ conda install pkg-config libuv
|
|||||||
pip install mkl-static mkl-include
|
pip install mkl-static mkl-include
|
||||||
# Add these packages if torch.distributed is needed.
|
# Add these packages if torch.distributed is needed.
|
||||||
# Distributed package support on Windows is a prototype feature and is subject to changes.
|
# Distributed package support on Windows is a prototype feature and is subject to changes.
|
||||||
conda install -c conda-forge libuv=1.51
|
conda install -c conda-forge libuv
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Install PyTorch
|
#### Install PyTorch
|
||||||
|
|||||||
@ -317,20 +317,10 @@ IF(USE_FBGEMM_GENAI)
|
|||||||
-greedy-reverse-local-assignment=1
|
-greedy-reverse-local-assignment=1
|
||||||
-fhip-new-launch-api)
|
-fhip-new-launch-api)
|
||||||
|
|
||||||
# Only compile for gfx942 for now.
|
|
||||||
# This is rather hacky, I could not figure out a clean solution :(
|
|
||||||
set(HIP_CLANG_FLAGS_ORIGINAL ${HIP_CLANG_FLAGS})
|
|
||||||
string(REGEX REPLACE "--offload-arch=[^ ]*" "" FILTERED_HIP_CLANG_FLAGS "${HIP_CLANG_FLAGS}")
|
|
||||||
if("gfx942" IN_LIST PYTORCH_ROCM_ARCH)
|
|
||||||
list(APPEND FILTERED_HIP_CLANG_FLAGS --offload-arch=gfx942;)
|
|
||||||
endif()
|
|
||||||
set(HIP_CLANG_FLAGS ${FILTERED_HIP_CLANG_FLAGS})
|
|
||||||
|
|
||||||
hip_add_library(
|
hip_add_library(
|
||||||
fbgemm_genai STATIC
|
fbgemm_genai STATIC
|
||||||
${fbgemm_genai_native_rocm_hip}
|
${fbgemm_genai_native_rocm_hip}
|
||||||
HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS})
|
HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${FBGEMM_GENAI_EXTRA_HIPCC_FLAGS})
|
||||||
set(HIP_CLANG_FLAGS ${HIP_CLANG_FLAGS_ORIGINAL})
|
|
||||||
set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(fbgemm_genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES)
|
target_compile_definitions(fbgemm_genai PRIVATE FBGEMM_GENAI_NO_EXTENDED_SHAPES)
|
||||||
|
|
||||||
|
|||||||
@ -65,24 +65,14 @@ DLDataType getDLDataType(const Tensor& t) {
|
|||||||
break;
|
break;
|
||||||
// TODO(#146647): use macro here instead of spelling out each shell dtype
|
// TODO(#146647): use macro here instead of spelling out each shell dtype
|
||||||
case ScalarType::Float8_e5m2:
|
case ScalarType::Float8_e5m2:
|
||||||
dtype.code = DLDataTypeCode::kDLFloat8_e5m2;
|
|
||||||
break;
|
|
||||||
case ScalarType::Float8_e5m2fnuz:
|
case ScalarType::Float8_e5m2fnuz:
|
||||||
dtype.code = DLDataTypeCode::kDLFloat8_e5m2fnuz;
|
|
||||||
break;
|
|
||||||
case ScalarType::Float8_e4m3fn:
|
case ScalarType::Float8_e4m3fn:
|
||||||
dtype.code = DLDataTypeCode::kDLFloat8_e4m3fn;
|
|
||||||
break;
|
|
||||||
case ScalarType::Float8_e4m3fnuz:
|
case ScalarType::Float8_e4m3fnuz:
|
||||||
dtype.code = DLDataTypeCode::kDLFloat8_e4m3fnuz;
|
|
||||||
break;
|
|
||||||
case ScalarType::Float8_e8m0fnu:
|
case ScalarType::Float8_e8m0fnu:
|
||||||
dtype.code = DLDataTypeCode::kDLFloat8_e8m0fnu;
|
TORCH_CHECK_BUFFER(false, "float8 types are not supported by dlpack");
|
||||||
break;
|
break;
|
||||||
case ScalarType::Float4_e2m1fn_x2:
|
case ScalarType::Float4_e2m1fn_x2:
|
||||||
dtype.code = DLDataTypeCode::kDLFloat4_e2m1fn;
|
TORCH_CHECK_BUFFER(false, "float4 types are not supported by dlpack");
|
||||||
dtype.lanes = 2;
|
|
||||||
dtype.bits = 4;
|
|
||||||
break;
|
break;
|
||||||
case ScalarType::QInt8:
|
case ScalarType::QInt8:
|
||||||
case ScalarType::QUInt8:
|
case ScalarType::QUInt8:
|
||||||
@ -187,11 +177,7 @@ static Device getATenDevice(DLDeviceType type, c10::DeviceIndex index, void* dat
|
|||||||
|
|
||||||
ScalarType toScalarType(const DLDataType& dtype) {
|
ScalarType toScalarType(const DLDataType& dtype) {
|
||||||
ScalarType stype = ScalarType::Undefined;
|
ScalarType stype = ScalarType::Undefined;
|
||||||
if (dtype.code != DLDataTypeCode::kDLFloat4_e2m1fn) {
|
TORCH_CHECK_BUFFER(dtype.lanes == 1, "ATen does not support lanes != 1");
|
||||||
TORCH_CHECK_BUFFER(
|
|
||||||
dtype.lanes == 1,
|
|
||||||
"ATen does not support lanes != 1 for dtype code", std::to_string(dtype.code));
|
|
||||||
}
|
|
||||||
switch (dtype.code) {
|
switch (dtype.code) {
|
||||||
case DLDataTypeCode::kDLUInt:
|
case DLDataTypeCode::kDLUInt:
|
||||||
switch (dtype.bits) {
|
switch (dtype.bits) {
|
||||||
@ -283,73 +269,6 @@ ScalarType toScalarType(const DLDataType& dtype) {
|
|||||||
false, "Unsupported kDLBool bits ", std::to_string(dtype.bits));
|
false, "Unsupported kDLBool bits ", std::to_string(dtype.bits));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DLDataTypeCode::kDLFloat8_e5m2:
|
|
||||||
switch (dtype.bits) {
|
|
||||||
case 8:
|
|
||||||
stype = ScalarType::Float8_e5m2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
TORCH_CHECK_BUFFER(
|
|
||||||
false, "Unsupported kDLFloat8_e5m2 bits ", std::to_string(dtype.bits));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DLDataTypeCode::kDLFloat8_e5m2fnuz:
|
|
||||||
switch (dtype.bits) {
|
|
||||||
case 8:
|
|
||||||
stype = ScalarType::Float8_e5m2fnuz;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
TORCH_CHECK_BUFFER(
|
|
||||||
false, "Unsupported kDLFloat8_e5m2fnuz bits ", std::to_string(dtype.bits));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DLDataTypeCode::kDLFloat8_e4m3fn:
|
|
||||||
switch (dtype.bits) {
|
|
||||||
case 8:
|
|
||||||
stype = ScalarType::Float8_e4m3fn;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
TORCH_CHECK_BUFFER(
|
|
||||||
false, "Unsupported kDLFloat8_e4m3fn bits ", std::to_string(dtype.bits));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DLDataTypeCode::kDLFloat8_e4m3fnuz:
|
|
||||||
switch (dtype.bits) {
|
|
||||||
case 8:
|
|
||||||
stype = ScalarType::Float8_e4m3fnuz;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
TORCH_CHECK_BUFFER(
|
|
||||||
false, "Unsupported kDLFloat8_e4m3fnuz bits ", std::to_string(dtype.bits));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DLDataTypeCode::kDLFloat8_e8m0fnu:
|
|
||||||
switch (dtype.bits) {
|
|
||||||
case 8:
|
|
||||||
stype = ScalarType::Float8_e8m0fnu;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
TORCH_CHECK_BUFFER(
|
|
||||||
false, "Unsupported kDLFloat8_e8m0fnu bits ", std::to_string(dtype.bits));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DLDataTypeCode::kDLFloat4_e2m1fn:
|
|
||||||
switch (dtype.bits) {
|
|
||||||
case 4:
|
|
||||||
switch (dtype.lanes) {
|
|
||||||
case 2:
|
|
||||||
stype = ScalarType::Float4_e2m1fn_x2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
TORCH_CHECK_BUFFER(
|
|
||||||
false, "Unsupported kDLFloat4_e2m1fn lanes ", std::to_string(dtype.lanes));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
TORCH_CHECK_BUFFER(
|
|
||||||
false, "Unsupported kDLFloat4_e2m1fn bits ", std::to_string(dtype.bits));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
TORCH_CHECK_BUFFER(false, "Unsupported code ", std::to_string(dtype.code));
|
TORCH_CHECK_BUFFER(false, "Unsupported code ", std::to_string(dtype.code));
|
||||||
}
|
}
|
||||||
@ -401,13 +320,30 @@ T* toDLPackImpl(const Tensor& src) {
|
|||||||
// The following code detects whether the src follows
|
// The following code detects whether the src follows
|
||||||
// a continuous pattern. If the src follows such pattern (common-case)
|
// a continuous pattern. If the src follows such pattern (common-case)
|
||||||
// then we do not need to normalize the strides.
|
// then we do not need to normalize the strides.
|
||||||
bool need_normalize_strides = src.dim() == 1 && src.size(0) == 1 && src.stride(0) != 1;
|
bool need_normalize_strides = false;
|
||||||
|
int64_t expected_stride = 1;
|
||||||
|
for (int i = src.dim() - 1; i >= 0; i--) {
|
||||||
|
// detect if we do not meet continuous pattern
|
||||||
|
// and the size is 1, so there is opportunity to normalize
|
||||||
|
if (src.stride(i) != expected_stride && src.size(i) == 1) {
|
||||||
|
need_normalize_strides = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
expected_stride *= src.size(i);
|
||||||
|
}
|
||||||
|
|
||||||
// less common case, try normalizing the strides
|
// less common case, try normalizing the strides
|
||||||
if (need_normalize_strides) {
|
if (need_normalize_strides) {
|
||||||
// create a new tensor with possibly normalized strides
|
// create a new tensor with possibly normalized strides
|
||||||
// gh-83069
|
// gh-83069
|
||||||
auto shape = src.sizes();
|
auto shape = src.sizes();
|
||||||
view = src.as_strided(shape, {1}, src.storage_offset());
|
auto strides = src.strides().vec();
|
||||||
|
for (int i = 0; i < src.dim(); i++) {
|
||||||
|
if (shape[i] < 2) {
|
||||||
|
strides[i] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
view = src.as_strided(shape, strides, src.storage_offset());
|
||||||
}
|
}
|
||||||
|
|
||||||
ATenDLMTensor<T>* atDLMTensor(new ATenDLMTensor<T>);
|
ATenDLMTensor<T>* atDLMTensor(new ATenDLMTensor<T>);
|
||||||
@ -418,8 +354,8 @@ T* toDLPackImpl(const Tensor& src) {
|
|||||||
atDLMTensor->tensor.dl_tensor.device = torchDeviceToDLDevice(src.device());
|
atDLMTensor->tensor.dl_tensor.device = torchDeviceToDLDevice(src.device());
|
||||||
atDLMTensor->tensor.dl_tensor.ndim = static_cast<int32_t>(src.dim());
|
atDLMTensor->tensor.dl_tensor.ndim = static_cast<int32_t>(src.dim());
|
||||||
atDLMTensor->tensor.dl_tensor.dtype = getDLDataType(src);
|
atDLMTensor->tensor.dl_tensor.dtype = getDLDataType(src);
|
||||||
atDLMTensor->tensor.dl_tensor.shape = const_cast<int64_t*>(view.sizes().data());
|
atDLMTensor->tensor.dl_tensor.shape = view.sizes().data();
|
||||||
atDLMTensor->tensor.dl_tensor.strides = const_cast<int64_t*>(view.strides().data());
|
atDLMTensor->tensor.dl_tensor.strides = view.strides().data();
|
||||||
atDLMTensor->tensor.dl_tensor.byte_offset = 0;
|
atDLMTensor->tensor.dl_tensor.byte_offset = 0;
|
||||||
fillVersion(&atDLMTensor->tensor);
|
fillVersion(&atDLMTensor->tensor);
|
||||||
|
|
||||||
|
|||||||
@ -468,7 +468,7 @@ inline Tensor _sum_to(
|
|||||||
// if we assume no reduction due to unbacked we ensure that at runtime.
|
// if we assume no reduction due to unbacked we ensure that at runtime.
|
||||||
TORCH_MAYBE_SYM_CHECK(
|
TORCH_MAYBE_SYM_CHECK(
|
||||||
sym_eq(shape[i - leading_dims], sizes[i]),
|
sym_eq(shape[i - leading_dims], sizes[i]),
|
||||||
"non-reduction path was assumed due to unbacked symbols expected those two sizes to be the same:",
|
"non-reduction path was assumed due to unabcked symbols expected those two sizes to be the same:",
|
||||||
shape[i - leading_dims],
|
shape[i - leading_dims],
|
||||||
", ",
|
", ",
|
||||||
sizes[i])
|
sizes[i])
|
||||||
|
|||||||
@ -9,6 +9,11 @@
|
|||||||
|
|
||||||
namespace at::functionalization {
|
namespace at::functionalization {
|
||||||
|
|
||||||
|
ViewMeta ViewMeta::to_out_idx(int64_t out_idx) {
|
||||||
|
if (out_idx == this->out_index) return *this;
|
||||||
|
return ViewMeta(forward_fn, reverse_fn, has_symbolic_inputs, is_multi_output, is_as_strided, out_idx);
|
||||||
|
}
|
||||||
|
|
||||||
// Note [Functionalization: Alias Removal Part 2]
|
// Note [Functionalization: Alias Removal Part 2]
|
||||||
// See Note [Functionalization: Alias Removal] for more details.
|
// See Note [Functionalization: Alias Removal] for more details.
|
||||||
// This function applies a single update from one of the views to the StorageImpl.
|
// This function applies a single update from one of the views to the StorageImpl.
|
||||||
@ -37,12 +42,12 @@ namespace at::functionalization {
|
|||||||
static const Tensor apply_update(const FunctionalStorageImpl::Update& update, const Tensor& base) {
|
static const Tensor apply_update(const FunctionalStorageImpl::Update& update, const Tensor& base) {
|
||||||
at::Tensor t = update.new_val;
|
at::Tensor t = update.new_val;
|
||||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
||||||
if (update.view_metas.empty()) { return t; }
|
if (update.view_metas.empty()) return t;
|
||||||
|
|
||||||
std::vector<at::Tensor> tmp_values({base});
|
std::vector<at::Tensor> tmp_values({base});
|
||||||
tmp_values.reserve(update.view_metas.size());
|
tmp_values.reserve(update.view_metas.size());
|
||||||
for (size_t i = 0; i < update.view_metas.size() - 1; ++i) {
|
for (size_t i = 0; i < update.view_metas.size() - 1; ++i) {
|
||||||
at::Tensor next_view = update.view_metas[i]->forward(tmp_values.back());
|
at::Tensor next_view = update.view_metas[i].forward_fn(tmp_values.back(), update.view_metas[i].out_index);
|
||||||
// NB: We only actually need tmp_values for ops like select/slice/diagonal/squeeze/as_strided
|
// NB: We only actually need tmp_values for ops like select/slice/diagonal/squeeze/as_strided
|
||||||
// All of these ops require additional information to recover the sizes of the original tensor.
|
// All of these ops require additional information to recover the sizes of the original tensor.
|
||||||
// If need to, we could probably apply this optimization and only bother computing tmp_values
|
// If need to, we could probably apply this optimization and only bother computing tmp_values
|
||||||
@ -50,8 +55,9 @@ static const Tensor apply_update(const FunctionalStorageImpl::Update& update, co
|
|||||||
tmp_values.push_back(std::move(next_view));
|
tmp_values.push_back(std::move(next_view));
|
||||||
}
|
}
|
||||||
for(int64_t i = static_cast<int64_t>(update.view_metas.size()) - 1; i >= 0; --i) {
|
for(int64_t i = static_cast<int64_t>(update.view_metas.size()) - 1; i >= 0; --i) {
|
||||||
|
int64_t out_idx = update.view_metas[i].out_index;
|
||||||
// Each view inverse is implemented in ViewInverses.cpp.
|
// Each view inverse is implemented in ViewInverses.cpp.
|
||||||
t = update.view_metas[i]->reverse(tmp_values[i], t);
|
t = update.view_metas[i].reverse_fn(tmp_values[i], t, out_idx);
|
||||||
}
|
}
|
||||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
||||||
return t;
|
return t;
|
||||||
@ -105,13 +111,13 @@ FunctionalStorageImpl::FunctionalStorageImpl(const Tensor& base)
|
|||||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(base_));
|
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(base_));
|
||||||
}
|
}
|
||||||
|
|
||||||
void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<std::shared_ptr<ViewMeta>>& metas) {
|
void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<ViewMeta>& metas) {
|
||||||
TORCH_CHECK(!frozen_, "cannot mutate tensors with frozen storage");
|
TORCH_CHECK(!frozen_, "cannot mutate tensors with frozen storage");
|
||||||
|
|
||||||
if (metas.size() > 1) {
|
if (metas.size() > 1) {
|
||||||
for (size_t i = 1; i < metas.size(); ++i) {
|
for (size_t i = 1; i < metas.size(); ++i) {
|
||||||
// Skipping this check for XLA. Would be good to add it back, but it is failing XLA CI
|
// Skipping this check for XLA. Would be good to add it back, but it is failing XLA CI
|
||||||
TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i]->is_as_strided,
|
TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i].is_as_strided,
|
||||||
"During torch.compile, encountered a mutation on a view chain of length ", metas.size(), ", where view ", i,
|
"During torch.compile, encountered a mutation on a view chain of length ", metas.size(), ", where view ", i,
|
||||||
" was an as_strided() call. as_strided() is non-compositional, and therefore is not possible to functionalize properly today,"
|
" was an as_strided() call. as_strided() is non-compositional, and therefore is not possible to functionalize properly today,"
|
||||||
"so this behavior is banned in compile. As a workaround, you can either remove the mutation from the model code, or you "
|
"so this behavior is banned in compile. As a workaround, you can either remove the mutation from the model code, or you "
|
||||||
|
|||||||
@ -8,89 +8,44 @@ namespace at::functionalization {
|
|||||||
|
|
||||||
// See Note [Functionalization Pass In Core]
|
// See Note [Functionalization Pass In Core]
|
||||||
|
|
||||||
enum class InverseReturnMode {
|
|
||||||
/// Specifies that functional inverses should always return a view.
|
|
||||||
AlwaysView,
|
|
||||||
/// Specifies that functional inverses should always return a non-view / copy.
|
|
||||||
NeverView,
|
|
||||||
/// Specifies that functional inverses should return a view unless a (copying)
|
|
||||||
/// scatter
|
|
||||||
/// inverse exists, in which case that will be used instead.
|
|
||||||
/// This avoids as_strided() calls that can be difficult for subclasses to
|
|
||||||
/// handle.
|
|
||||||
ViewOrScatterInverse,
|
|
||||||
};
|
|
||||||
|
|
||||||
#define FUNCTIONALIZATION_VIEWMETA_NAME(TYPE) \
|
|
||||||
static const char* name() { \
|
|
||||||
return #TYPE; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(...) \
|
|
||||||
using SerializableTuple = std::tuple<__VA_ARGS__>
|
|
||||||
|
|
||||||
// ViewMeta is a class used by the functionalization pass to navigate between
|
// ViewMeta is a class used by the functionalization pass to navigate between
|
||||||
// a base tensor and a view tensor.
|
// a base tensor and a view tensor.
|
||||||
// For example, if I call `b = a.view1(...)`
|
// For example, if I call `b = a.view1(...)`
|
||||||
// the functionalization pass will generate and store a ViewMeta specialization
|
// the functionalization pass will generate and store a ViewMeta on b that looks
|
||||||
// for `view1` operation on b that looks like:
|
// like:
|
||||||
//
|
//
|
||||||
// struct TORCH_API view1_ViewMeta : public ViewMeta {
|
// ViewMeta(
|
||||||
// FUNCTIONALIZATION_VIEWMETA_NAME(view1_ViewMeta);
|
// [<captures>](const Tensor& base, int64_t mutated_view_idx) {
|
||||||
// FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
|
// return base.view1(...);
|
||||||
// bool /* reapply_views */,
|
// },
|
||||||
// const std::vector<int64_t>&);
|
// [<captures>](const at::Tensor& base, const at::Tensor& mutated_view,
|
||||||
//
|
// int64_t mutated_view_idx) -> at::Tensor {
|
||||||
// view1_ViewMeta(const SerializableTuple& tpl)
|
// return at::functionalization::impl::view1_inverse(base, mutated_view,
|
||||||
// : view1_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
|
// ...);
|
||||||
//
|
|
||||||
// view1_ViewMeta(bool reapply_views, const std::vector<int64_t>& size)
|
|
||||||
// : ViewMeta(/*has_symbolic_inputs=*/false),
|
|
||||||
// reapply_views(reapply_views),
|
|
||||||
// size(size) {}
|
|
||||||
//
|
|
||||||
// Tensor forward(const Tensor& base) override {
|
|
||||||
// return base.view1(...);
|
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// Tensor reverse(const Tensor& base, const Tensor& mutated_view) override {
|
// The forward_fn lambda describes how to replay view1 on a tensor.
|
||||||
// return at::functionalization::impl::view1_inverse(base, mutated_view,
|
|
||||||
// ...);
|
|
||||||
// }
|
|
||||||
//
|
//
|
||||||
// SerializableTuple to_serializable_tuple() {
|
// The reverse_fn lambda describes how, given a tensor that is already a view,
|
||||||
// return std::make_tuple(reapply_views, size);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// bool reapply_views;
|
|
||||||
// std::vector<int64_t> size;
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// The forward function describes how to replay view1 on a tensor.
|
|
||||||
//
|
|
||||||
// The reverse function describes how, given a tensor that is already a view,
|
|
||||||
// how to get the corresponding base tensor. See Note [Functionalization Pass:
|
// how to get the corresponding base tensor. See Note [Functionalization Pass:
|
||||||
// View Inverses] for details.
|
// View Inverses] for details.
|
||||||
//
|
|
||||||
// `SerializedTuple` is a typedef that defines an `std::tuple<...>` type
|
|
||||||
// representing the `ViewMeta` instance state. Methods that take in/return such
|
|
||||||
// a type are used for supporting pickle serialization.
|
|
||||||
struct ViewMeta {
|
struct ViewMeta {
|
||||||
ViewMeta(
|
ViewMeta(
|
||||||
|
std::function<Tensor(const Tensor&, int64_t)> forward,
|
||||||
|
std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse,
|
||||||
bool has_symbolic_inputs,
|
bool has_symbolic_inputs,
|
||||||
bool is_multi_output = false,
|
bool is_multi_output = false,
|
||||||
bool is_as_strided = false,
|
bool is_as_strided = false,
|
||||||
int64_t out_idx = 0)
|
int64_t out_idx = 0)
|
||||||
: out_index(out_idx),
|
: forward_fn(std::move(forward)),
|
||||||
|
reverse_fn(std::move(reverse)),
|
||||||
|
out_index(out_idx),
|
||||||
is_multi_output(is_multi_output),
|
is_multi_output(is_multi_output),
|
||||||
is_as_strided(is_as_strided),
|
is_as_strided(is_as_strided),
|
||||||
has_symbolic_inputs(has_symbolic_inputs) {}
|
has_symbolic_inputs(has_symbolic_inputs) {}
|
||||||
|
|
||||||
virtual ~ViewMeta() = default;
|
std::function<Tensor(const Tensor&, int64_t)> forward_fn;
|
||||||
|
std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse_fn;
|
||||||
virtual Tensor forward(const Tensor& base) = 0;
|
|
||||||
virtual Tensor reverse(const Tensor& base, const Tensor& mutated_view) = 0;
|
|
||||||
|
|
||||||
// See Note [out_idx in ViewMeta]
|
// See Note [out_idx in ViewMeta]
|
||||||
int64_t out_index;
|
int64_t out_index;
|
||||||
|
|
||||||
@ -102,17 +57,10 @@ struct ViewMeta {
|
|||||||
// Tells us if this view operation has any symbolic inputs
|
// Tells us if this view operation has any symbolic inputs
|
||||||
bool has_symbolic_inputs;
|
bool has_symbolic_inputs;
|
||||||
|
|
||||||
// Returns a new ViewMeta with the same forward/reverse
|
// Returns a copy of the current ViewMeta, if out_idx matches the current
|
||||||
|
// out_index. Otherwise, returns a new ViewMeta with the same forward/reverse
|
||||||
// functions, but a new out index.
|
// functions, but a new out index.
|
||||||
//
|
ViewMeta to_out_idx(int64_t out_idx);
|
||||||
// This method should be implemented by those `ViewMeta` that have more than
|
|
||||||
// one output.
|
|
||||||
virtual std::shared_ptr<ViewMeta> to_out_index(int64_t out_index) {
|
|
||||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
|
||||||
false,
|
|
||||||
"ViewMeta::to_out_index not implemented. ",
|
|
||||||
"Likely because there's only one output.");
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// FunctionalStorageImpl is a subclass of StorageImpl used by the
|
// FunctionalStorageImpl is a subclass of StorageImpl used by the
|
||||||
@ -145,14 +93,14 @@ struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl {
|
|||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
|
||||||
const at::Tensor new_val;
|
const at::Tensor new_val;
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
|
||||||
const std::vector<std::shared_ptr<ViewMeta>> view_metas;
|
const std::vector<ViewMeta> view_metas;
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit FunctionalStorageImpl(const Tensor& value);
|
explicit FunctionalStorageImpl(const Tensor& value);
|
||||||
|
|
||||||
void add_update(
|
void add_update(
|
||||||
const Tensor& updated_val,
|
const Tensor& updated_val,
|
||||||
const std::vector<std::shared_ptr<ViewMeta>>& view_metas);
|
const std::vector<ViewMeta>& view_metas);
|
||||||
bool apply_updates();
|
bool apply_updates();
|
||||||
const Tensor& base() {
|
const Tensor& base() {
|
||||||
return base_;
|
return base_;
|
||||||
|
|||||||
@ -129,19 +129,17 @@ void FunctionalTensorWrapper::freeze_storage() const {
|
|||||||
// - view_value: The output tensor that we need to wrap.
|
// - view_value: The output tensor that we need to wrap.
|
||||||
// - base: The "base" of the view that `view_value` was generated from.
|
// - base: The "base" of the view that `view_value` was generated from.
|
||||||
// See Note [Functionalization: Alias Removal Part 2] for more details on the mutation replay logic.
|
// See Note [Functionalization: Alias Removal Part 2] for more details on the mutation replay logic.
|
||||||
FunctionalTensorWrapper::FunctionalTensorWrapper(
|
FunctionalTensorWrapper::FunctionalTensorWrapper(const Tensor& view_value, const FunctionalTensorWrapper* base, const functionalization::ViewMeta& meta)
|
||||||
const Tensor& view_value,
|
: c10::TensorImpl(
|
||||||
const FunctionalTensorWrapper* base,
|
c10::DispatchKeySet(DispatchKey::Functionalize),
|
||||||
const std::shared_ptr<functionalization::ViewMeta>& meta)
|
view_value.dtype(),
|
||||||
: c10::TensorImpl(
|
view_value.device()
|
||||||
c10::DispatchKeySet(DispatchKey::Functionalize),
|
),
|
||||||
view_value.dtype(),
|
value_(view_value),
|
||||||
base->storage().data_ptr().device()),
|
is_multi_output_view_(base->is_multi_output_view_ || meta.is_multi_output),
|
||||||
value_(view_value),
|
was_storage_changed_(base->was_storage_changed_),
|
||||||
is_multi_output_view_(
|
is_symbolic_(base->is_symbolic_)
|
||||||
base->is_multi_output_view_ || meta->is_multi_output),
|
{
|
||||||
was_storage_changed_(base->was_storage_changed_),
|
|
||||||
is_symbolic_(base->is_symbolic_) {
|
|
||||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(value_));
|
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(value_));
|
||||||
TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize));
|
TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize));
|
||||||
set_constructor_metadata();
|
set_constructor_metadata();
|
||||||
@ -150,10 +148,11 @@ FunctionalTensorWrapper::FunctionalTensorWrapper(
|
|||||||
view_metas_ = base->view_metas_; // copy
|
view_metas_ = base->view_metas_; // copy
|
||||||
}
|
}
|
||||||
view_metas_.push_back(meta);
|
view_metas_.push_back(meta);
|
||||||
maybe_mark_symbolic(meta.get());
|
maybe_mark_symbolic(meta);
|
||||||
storage_ = base->storage_; // alias this tensor's storage with the base tensor's
|
storage_ = base->storage_; // alias this tensor's storage with the base tensor's
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
functionalization::FunctionalStorageImpl* FunctionalTensorWrapper::functional_storage_impl() const {
|
functionalization::FunctionalStorageImpl* FunctionalTensorWrapper::functional_storage_impl() const {
|
||||||
return static_cast<functionalization::FunctionalStorageImpl*>(storage_.unsafeGetStorageImpl());
|
return static_cast<functionalization::FunctionalStorageImpl*>(storage_.unsafeGetStorageImpl());
|
||||||
}
|
}
|
||||||
@ -177,18 +176,18 @@ bool FunctionalTensorWrapper::is_up_to_date() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// See Note [Functionalization Pass - Inplace View Ops]
|
// See Note [Functionalization Pass - Inplace View Ops]
|
||||||
void FunctionalTensorWrapper::mutate_view_meta(const std::shared_ptr<at::functionalization::ViewMeta>& meta) {
|
void FunctionalTensorWrapper::mutate_view_meta(const at::functionalization::ViewMeta& meta) {
|
||||||
view_metas_.push_back(meta);
|
view_metas_.push_back(meta);
|
||||||
// Manually track the fact that this tensor received a metadata mutation!
|
// Manually track the fact that this tensor received a metadata mutation!
|
||||||
has_metadata_mutation_ = true;
|
has_metadata_mutation_ = true;
|
||||||
// Mark this tensor as being symbolic if there are any symbolic inputs used by the view operation.
|
// Mark this tensor as being symbolic if there are any symbolic inputs used by the view operation.
|
||||||
maybe_mark_symbolic(meta.get());
|
maybe_mark_symbolic(meta);
|
||||||
// Note [Functionalization Pass - Inplace View Ops]
|
// Note [Functionalization Pass - Inplace View Ops]
|
||||||
// So, these ops are special - they're mutation AND view ops. They get special codegen.
|
// So, these ops are special - they're mutation AND view ops. They get special codegen.
|
||||||
// An example is transpose_, e.g. `a.transpose_()`
|
// An example is transpose_, e.g. `a.transpose_()`
|
||||||
// Calling transpose_() should ensure that a gets an alias, and append the new ViewMeta to a's current list of ViewMetas.
|
// Calling transpose_() should ensure that a gets an alias, and append the new ViewMeta to a's current list of ViewMetas.
|
||||||
at::AutoDispatchSkipFunctionalize guard;
|
at::AutoDispatchSkipFunctionalize guard;
|
||||||
value_ = meta->forward(value_);
|
value_ = meta.forward_fn(value_, meta.out_index);
|
||||||
TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize));
|
TORCH_INTERNAL_ASSERT(!value_.key_set().has(c10::DispatchKey::Functionalize));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,8 +368,15 @@ void FunctionalTensorWrapper::sync_() {
|
|||||||
regenerate_from_base();
|
regenerate_from_base();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<std::shared_ptr<functionalization::ViewMeta>>& FunctionalTensorWrapper::view_metas() const {
|
Tensor FunctionalTensorWrapper::apply_view_metas(const Tensor& base) {
|
||||||
return view_metas_;
|
auto t = base;
|
||||||
|
|
||||||
|
// Reapply views to get the viewed tensor from the base in alias_
|
||||||
|
for (auto& view_meta: view_metas_) {
|
||||||
|
t = view_meta.forward_fn(t, view_meta.out_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FunctionalTensorWrapper::regenerate_from_base() {
|
void FunctionalTensorWrapper::regenerate_from_base() {
|
||||||
@ -379,7 +385,7 @@ void FunctionalTensorWrapper::regenerate_from_base() {
|
|||||||
auto t = storage_impl->base();
|
auto t = storage_impl->base();
|
||||||
|
|
||||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
||||||
t = at::functionalization::impl::apply_view_meta_sequence(t, view_metas_);
|
t = apply_view_metas(t);
|
||||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
|
||||||
|
|
||||||
replace_(t, /*from_lazy_regenerate=*/true);
|
replace_(t, /*from_lazy_regenerate=*/true);
|
||||||
@ -479,10 +485,7 @@ void FunctionalTensorWrapper::shallow_copy_from(const c10::intrusive_ptr<TensorI
|
|||||||
|
|
||||||
|
|
||||||
c10::Device FunctionalTensorWrapper::device_custom() const {
|
c10::Device FunctionalTensorWrapper::device_custom() const {
|
||||||
// The storage pointer already uses the underlying tensor custom device (if
|
return value_.unsafeGetTensorImpl()->device();
|
||||||
// applicable) to extract the device. So, we dont have to recurse again by
|
|
||||||
// doing value_.unsafeGetTensorImpl()->device().
|
|
||||||
return storage().data_ptr().device();
|
|
||||||
}
|
}
|
||||||
at::IntArrayRef FunctionalTensorWrapper::sizes_custom() const {
|
at::IntArrayRef FunctionalTensorWrapper::sizes_custom() const {
|
||||||
return value_.unsafeGetTensorImpl()->sizes();
|
return value_.unsafeGetTensorImpl()->sizes();
|
||||||
@ -721,11 +724,11 @@ bool isFunctionalTensor(const std::optional<Tensor>& t) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool isFunctionalTensor(const c10::List<::std::optional<Tensor>>& t_list) {
|
bool isFunctionalTensor(const c10::List<::std::optional<Tensor>>& t_list) {
|
||||||
if (t_list.empty()) { return false; }
|
if (t_list.empty()) return false;
|
||||||
auto functional_count = 0;
|
auto functional_count = 0;
|
||||||
for (const auto i : c10::irange(t_list.size())) {
|
for (const auto i : c10::irange(t_list.size())) {
|
||||||
auto const & e= t_list[i];
|
auto const & e= t_list[i];
|
||||||
if (!e.has_value() || !e->defined()) { continue; }
|
if (!e.has_value() || !e->defined()) continue;
|
||||||
if (isFunctionalTensor(e)) {
|
if (isFunctionalTensor(e)) {
|
||||||
++functional_count;
|
++functional_count;
|
||||||
}
|
}
|
||||||
@ -735,10 +738,10 @@ bool isFunctionalTensor(const c10::List<::std::optional<Tensor>>& t_list) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static bool isFunctionalTensorIListRef(c10::IListRef<T> list) {
|
static bool isFunctionalTensorIListRef(c10::IListRef<T> list) {
|
||||||
if (list.size() == 0) { return false; }
|
if (list.size() == 0) return false;
|
||||||
auto functional_count = 0;
|
auto functional_count = 0;
|
||||||
for (const auto& tensor : list) {
|
for (const auto& tensor : list) {
|
||||||
if (!tensor.defined()) { continue; }
|
if (!tensor.defined()) continue;
|
||||||
if (isFunctionalTensor(tensor)) {
|
if (isFunctionalTensor(tensor)) {
|
||||||
++functional_count;
|
++functional_count;
|
||||||
}
|
}
|
||||||
@ -756,28 +759,20 @@ void freeze_functional_tensor(const Tensor& tensor) {
|
|||||||
functional_base_impl->freeze_storage();
|
functional_base_impl->freeze_storage();
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor create_functional_tensor_with_view_meta(
|
Tensor create_functional_tensor_with_view_meta(const at::Tensor& view_to_wrap, const at::Tensor& base, functionalization::ViewMeta meta, int64_t out_idx) {
|
||||||
const at::Tensor& view_to_wrap,
|
|
||||||
const at::Tensor& base,
|
|
||||||
const std::shared_ptr<functionalization::ViewMeta>& meta,
|
|
||||||
int64_t out_idx) {
|
|
||||||
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(view_to_wrap));
|
TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(view_to_wrap));
|
||||||
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(base));
|
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(base));
|
||||||
auto functional_base_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(base);
|
auto functional_base_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(base);
|
||||||
auto meta_ = meta;
|
|
||||||
if (out_idx != 0) {
|
if (out_idx != 0) {
|
||||||
// Note [out_idx in ViewMeta]
|
// Note [out_idx in ViewMeta]
|
||||||
// When a view op outputs multiple tensors, each output needs its own separate ViewMeta.
|
// When a view op outputs multiple tensors, each output needs its own separate ViewMeta.
|
||||||
// Each ViewMeta also tracks the index of the particular output tensor, which is needed in the reverse function.
|
// Each ViewMeta also tracks the index of the particular output tensor, which is needed in the reverse function.
|
||||||
meta_ = meta->to_out_index(out_idx);
|
meta = meta.to_out_idx(out_idx);
|
||||||
}
|
}
|
||||||
return at::detail::make_tensor<FunctionalTensorWrapper>(view_to_wrap, functional_base_impl, meta_);
|
return at::detail::make_tensor<FunctionalTensorWrapper>(view_to_wrap, functional_base_impl, meta);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Tensor> create_functional_tensor_with_view_meta(
|
std::vector<Tensor> create_functional_tensor_with_view_meta(ITensorListRef view_to_wrap, const at::Tensor& base, const functionalization::ViewMeta& meta) {
|
||||||
ITensorListRef view_to_wrap,
|
|
||||||
const at::Tensor& base,
|
|
||||||
const std::shared_ptr<functionalization::ViewMeta>& meta) {
|
|
||||||
std::vector<Tensor> outputs(view_to_wrap.size());
|
std::vector<Tensor> outputs(view_to_wrap.size());
|
||||||
int64_t i = 0;
|
int64_t i = 0;
|
||||||
for (const auto& tensor : view_to_wrap) {
|
for (const auto& tensor : view_to_wrap) {
|
||||||
@ -787,22 +782,12 @@ std::vector<Tensor> create_functional_tensor_with_view_meta(
|
|||||||
return outputs;
|
return outputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mutate_view_meta(const at::Tensor& self, const std::shared_ptr<functionalization::ViewMeta>& meta) {
|
void mutate_view_meta(const at::Tensor& self, const functionalization::ViewMeta& meta) {
|
||||||
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(self));
|
TORCH_INTERNAL_ASSERT(at::functionalization::impl::isFunctionalTensor(self));
|
||||||
auto self_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(self);
|
auto self_impl = at::functionalization::impl::unsafeGetFunctionalWrapper(self);
|
||||||
self_impl->mutate_view_meta(meta);
|
self_impl->mutate_view_meta(meta);
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor apply_view_meta_sequence(
|
|
||||||
const Tensor& base,
|
|
||||||
const std::vector<std::shared_ptr<functionalization::ViewMeta>>& sequence) {
|
|
||||||
Tensor r = base;
|
|
||||||
for (auto& vm : sequence) {
|
|
||||||
r = vm->forward(r);
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note [Propagating strides in the functionalization pass]
|
// Note [Propagating strides in the functionalization pass]
|
||||||
// In order to properly compute stride information, the functionalization pass
|
// In order to properly compute stride information, the functionalization pass
|
||||||
// calls each {view} reference implementations with meta tensors.
|
// calls each {view} reference implementations with meta tensors.
|
||||||
@ -896,7 +881,7 @@ void functionalize_op_helper(const c10::OperatorHandle& op, torch::jit::Stack* s
|
|||||||
const auto& ivalue = returns[idx];
|
const auto& ivalue = returns[idx];
|
||||||
if (ivalue.isTensor()) {
|
if (ivalue.isTensor()) {
|
||||||
const auto& t = ivalue.toTensor();
|
const auto& t = ivalue.toTensor();
|
||||||
if (!t.defined()) { continue; }
|
if (!t.defined()) continue;
|
||||||
at::functionalization::impl::sync(t);
|
at::functionalization::impl::sync(t);
|
||||||
auto t_new = c10::IValue(at::functionalization::impl::from_functional_tensor(t));
|
auto t_new = c10::IValue(at::functionalization::impl::from_functional_tensor(t));
|
||||||
(*stack)[returns_begin + idx] = t_new;
|
(*stack)[returns_begin + idx] = t_new;
|
||||||
|
|||||||
@ -56,7 +56,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
|||||||
explicit FunctionalTensorWrapper(
|
explicit FunctionalTensorWrapper(
|
||||||
const Tensor& view_value,
|
const Tensor& view_value,
|
||||||
const FunctionalTensorWrapper* base,
|
const FunctionalTensorWrapper* base,
|
||||||
const std::shared_ptr<functionalization::ViewMeta>& meta);
|
const functionalization::ViewMeta& meta);
|
||||||
|
|
||||||
// Get the underlying, actual tensor, that doesn't know anything about
|
// Get the underlying, actual tensor, that doesn't know anything about
|
||||||
// functionalization.
|
// functionalization.
|
||||||
@ -99,17 +99,17 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
|||||||
->are_all_mutations_under_no_grad_or_inference_mode();
|
->are_all_mutations_under_no_grad_or_inference_mode();
|
||||||
}
|
}
|
||||||
|
|
||||||
void maybe_mark_symbolic(functionalization::ViewMeta* meta) {
|
void maybe_mark_symbolic(const functionalization::ViewMeta& meta) {
|
||||||
is_symbolic_ = is_symbolic_ | meta->has_symbolic_inputs;
|
is_symbolic_ = is_symbolic_ | meta.has_symbolic_inputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_symbolic() const {
|
bool is_symbolic() const {
|
||||||
return is_symbolic_;
|
return is_symbolic_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieves the ViewMeta sequence of this tensor.
|
// Runs the forward_fn of every ViewMeta collected in the current instance
|
||||||
const std::vector<std::shared_ptr<functionalization::ViewMeta>>& view_metas()
|
// to some other base.
|
||||||
const;
|
Tensor apply_view_metas(const Tensor& base);
|
||||||
|
|
||||||
// Sync's the underlying tensor with its alias, if it's out of date. This
|
// Sync's the underlying tensor with its alias, if it's out of date. This
|
||||||
// involves two steps: 1) Apply any pending updates/mutations to the alias 2)
|
// involves two steps: 1) Apply any pending updates/mutations to the alias 2)
|
||||||
@ -146,8 +146,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
|||||||
// from the base tensor. This method is used by inplace-view ops like
|
// from the base tensor. This method is used by inplace-view ops like
|
||||||
// transpose_. It appends a ViewMeta to the existing stack, and refreshes the
|
// transpose_. It appends a ViewMeta to the existing stack, and refreshes the
|
||||||
// tensor by replaying the views off of the alias.
|
// tensor by replaying the views off of the alias.
|
||||||
void mutate_view_meta(
|
void mutate_view_meta(const at::functionalization::ViewMeta& meta);
|
||||||
const std::shared_ptr<at::functionalization::ViewMeta>& meta);
|
|
||||||
|
|
||||||
// Custom implementation of self.set_(src)
|
// Custom implementation of self.set_(src)
|
||||||
void set__impl(const FunctionalTensorWrapper* other);
|
void set__impl(const FunctionalTensorWrapper* other);
|
||||||
@ -286,7 +285,7 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
|
|||||||
bool is_symbolic_ = false;
|
bool is_symbolic_ = false;
|
||||||
|
|
||||||
size_t generation_ = 0;
|
size_t generation_ = 0;
|
||||||
std::vector<std::shared_ptr<at::functionalization::ViewMeta>> view_metas_;
|
std::vector<at::functionalization::ViewMeta> view_metas_;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
static void copy_tensor_metadata(
|
static void copy_tensor_metadata(
|
||||||
@ -378,20 +377,16 @@ TORCH_API void propagate_xla_data_direct(
|
|||||||
Tensor create_functional_tensor_with_view_meta(
|
Tensor create_functional_tensor_with_view_meta(
|
||||||
const Tensor& view_to_wrap,
|
const Tensor& view_to_wrap,
|
||||||
const Tensor& base,
|
const Tensor& base,
|
||||||
const std::shared_ptr<functionalization::ViewMeta>& meta,
|
functionalization::ViewMeta meta,
|
||||||
int64_t out_idx = 0);
|
int64_t out_idx = 0);
|
||||||
std::vector<Tensor> create_functional_tensor_with_view_meta(
|
std::vector<Tensor> create_functional_tensor_with_view_meta(
|
||||||
ITensorListRef view_to_wrap,
|
ITensorListRef view_to_wrap,
|
||||||
const Tensor& base,
|
const Tensor& base,
|
||||||
const std::shared_ptr<functionalization::ViewMeta>& meta);
|
const functionalization::ViewMeta& meta);
|
||||||
|
|
||||||
void mutate_view_meta(
|
void mutate_view_meta(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
const std::shared_ptr<functionalization::ViewMeta>& meta);
|
const functionalization::ViewMeta& meta);
|
||||||
|
|
||||||
TORCH_API Tensor apply_view_meta_sequence(
|
|
||||||
const Tensor& base,
|
|
||||||
const std::vector<std::shared_ptr<functionalization::ViewMeta>>& sequence);
|
|
||||||
|
|
||||||
void set_sizes_strides_offset(const Tensor& out, const Tensor& meta_out);
|
void set_sizes_strides_offset(const Tensor& out, const Tensor& meta_out);
|
||||||
void set_sizes_strides_offset(
|
void set_sizes_strides_offset(
|
||||||
|
|||||||
@ -1,5 +1,3 @@
|
|||||||
#include <ATen/FunctionalizeFallbackKernel.h>
|
|
||||||
|
|
||||||
#include <ATen/core/dispatch/Dispatcher.h>
|
#include <ATen/core/dispatch/Dispatcher.h>
|
||||||
#include <ATen/core/LegacyTypeDispatch.h>
|
#include <ATen/core/LegacyTypeDispatch.h>
|
||||||
#include <ATen/EmptyTensor.h>
|
#include <ATen/EmptyTensor.h>
|
||||||
@ -9,6 +7,7 @@
|
|||||||
#include <torch/library.h>
|
#include <torch/library.h>
|
||||||
#include <c10/util/irange.h>
|
#include <c10/util/irange.h>
|
||||||
#include <c10/util/strides.h>
|
#include <c10/util/strides.h>
|
||||||
|
#include <ATen/EmptyTensor.h>
|
||||||
|
|
||||||
#ifndef AT_PER_OPERATOR_HEADERS
|
#ifndef AT_PER_OPERATOR_HEADERS
|
||||||
#include <ATen/ATen.h>
|
#include <ATen/ATen.h>
|
||||||
@ -29,31 +28,6 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace at::functionalization {
|
|
||||||
|
|
||||||
Tensor resize__ViewMeta::forward(const Tensor& base) {
|
|
||||||
if (reapply_views) {
|
|
||||||
return base.as_strided(size, c10::contiguous_strides(size));
|
|
||||||
} else {
|
|
||||||
return at::as_strided_copy(base, size, c10::contiguous_strides(size));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor resize__ViewMeta::reverse(const Tensor& base, const Tensor& mutated_view) {
|
|
||||||
return base.as_strided_scatter(
|
|
||||||
mutated_view, size, c10::contiguous_strides(size));
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor _unsafe_view_ViewMeta::forward(const Tensor& base) {
|
|
||||||
return at::_unsafe_view_symint(base, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor _unsafe_view_ViewMeta::reverse(const Tensor& base, const Tensor& mutated_view) {
|
|
||||||
return at::_unsafe_view_symint(mutated_view, base.sym_sizes());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace at::functionalization
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
void functionalizeFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatchKeySet [[maybe_unused]], torch::jit::Stack* stack) {
|
void functionalizeFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatchKeySet [[maybe_unused]], torch::jit::Stack* stack) {
|
||||||
const auto& schema = op.schema();
|
const auto& schema = op.schema();
|
||||||
@ -132,9 +106,7 @@ namespace {
|
|||||||
const auto& ivalue = returns[idx];
|
const auto& ivalue = returns[idx];
|
||||||
if (ivalue.isTensor() && should_wrap_outputs) {
|
if (ivalue.isTensor() && should_wrap_outputs) {
|
||||||
const auto& t = ivalue.toTensor();
|
const auto& t = ivalue.toTensor();
|
||||||
if (!t.defined()) {
|
if (!t.defined()) continue;
|
||||||
continue;
|
|
||||||
}
|
|
||||||
auto t_new = c10::IValue(at::functionalization::impl::to_functional_tensor(t));
|
auto t_new = c10::IValue(at::functionalization::impl::to_functional_tensor(t));
|
||||||
(*stack)[returns_begin + idx] = t_new;
|
(*stack)[returns_begin + idx] = t_new;
|
||||||
} else if (ivalue.isTensorList() && should_wrap_outputs) {
|
} else if (ivalue.isTensorList() && should_wrap_outputs) {
|
||||||
@ -197,8 +169,19 @@ static const at::Tensor & resize__functionalization(c10::DispatchKeySet dispatch
|
|||||||
// The output of resizing is equivalent to taking a slice of a larger tensor.
|
// The output of resizing is equivalent to taking a slice of a larger tensor.
|
||||||
// We have to emulate this "slicing" with an as_strided call.
|
// We have to emulate this "slicing" with an as_strided call.
|
||||||
auto reapply_views = at::functionalization::impl::getFunctionalizationReapplyViewsTLS();
|
auto reapply_views = at::functionalization::impl::getFunctionalizationReapplyViewsTLS();
|
||||||
auto view_meta = std::make_shared<at::functionalization::resize__ViewMeta>(
|
at::functionalization::ViewMeta view_meta = at::functionalization::ViewMeta(
|
||||||
reapply_views, size.vec());
|
[reapply_views = reapply_views, size = size.vec()](const at::Tensor & base, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
|
||||||
|
if (reapply_views) {
|
||||||
|
return base.as_strided(size, c10::contiguous_strides(size));
|
||||||
|
} else {
|
||||||
|
return at::as_strided_copy(base, size, c10::contiguous_strides(size));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[size = size.vec()](const at::Tensor & base, const at::Tensor & mutated_view, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
|
||||||
|
return base.as_strided_scatter(mutated_view, size, c10::contiguous_strides(size));
|
||||||
|
},
|
||||||
|
/*has_symbolic_inputs=*/false
|
||||||
|
);
|
||||||
at::functionalization::impl::mutate_view_meta(self, view_meta);
|
at::functionalization::impl::mutate_view_meta(self, view_meta);
|
||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
@ -317,11 +300,17 @@ static at::Tensor _unsafe_view_functionalize(const at::Tensor & self, at::SymInt
|
|||||||
tmp_output = at::_unsafe_view_symint(self_, size);
|
tmp_output = at::_unsafe_view_symint(self_, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_symbolic_inputs = std::any_of(
|
bool has_symbolic_inputs = std::any_of(size.begin(), size.end(), [=](auto& s) { return s.is_symbolic(); });
|
||||||
size.begin(), size.end(), [=](auto& s) { return s.is_symbolic(); });
|
|
||||||
auto view_meta =
|
at::functionalization::ViewMeta view_meta = at::functionalization::ViewMeta(
|
||||||
std::make_shared<at::functionalization::_unsafe_view_ViewMeta>(
|
[size = size.vec()](const at::Tensor & base, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
|
||||||
has_symbolic_inputs, size.vec());
|
return at::_unsafe_view_symint(base, size);
|
||||||
|
},
|
||||||
|
[size = size.vec()](const at::Tensor & base, const at::Tensor & mutated_view, int64_t mutated_view_idx [[maybe_unused]]) -> at::Tensor {
|
||||||
|
return at::_unsafe_view_symint(mutated_view, base.sym_sizes());
|
||||||
|
},
|
||||||
|
/*has_symbolic_inputs=*/has_symbolic_inputs
|
||||||
|
);
|
||||||
|
|
||||||
auto out = at::functionalization::impl::create_functional_tensor_with_view_meta(tmp_output, self, std::move(view_meta));
|
auto out = at::functionalization::impl::create_functional_tensor_with_view_meta(tmp_output, self, std::move(view_meta));
|
||||||
// See Note [Propagating strides in the functionalization pass]
|
// See Note [Propagating strides in the functionalization pass]
|
||||||
|
|||||||
@ -1,58 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <ATen/FunctionalStorageImpl.h>
|
|
||||||
|
|
||||||
namespace at::functionalization {
|
|
||||||
|
|
||||||
// `ViewMeta` implementation for `resize_` operation.
|
|
||||||
struct TORCH_API resize__ViewMeta : public ViewMeta {
|
|
||||||
FUNCTIONALIZATION_VIEWMETA_NAME(resize__ViewMeta)
|
|
||||||
FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
|
|
||||||
bool /* reapply_views */,
|
|
||||||
const std::vector<int64_t>&);
|
|
||||||
|
|
||||||
resize__ViewMeta(const SerializableTuple& tpl)
|
|
||||||
: resize__ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
|
|
||||||
|
|
||||||
resize__ViewMeta(bool reapply_views, const std::vector<int64_t>& size)
|
|
||||||
: ViewMeta(/*has_symbolic_inputs=*/false),
|
|
||||||
reapply_views(reapply_views),
|
|
||||||
size(size) {}
|
|
||||||
|
|
||||||
Tensor forward(const Tensor& base) override;
|
|
||||||
Tensor reverse(const Tensor& base, const Tensor& mutated_view) override;
|
|
||||||
|
|
||||||
SerializableTuple to_serializable_tuple() {
|
|
||||||
return std::make_tuple(reapply_views, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool reapply_views;
|
|
||||||
std::vector<int64_t> size;
|
|
||||||
};
|
|
||||||
|
|
||||||
// `ViewMeta` implementation for `_unsafe_view` operation.
|
|
||||||
struct TORCH_API _unsafe_view_ViewMeta : public ViewMeta {
|
|
||||||
FUNCTIONALIZATION_VIEWMETA_NAME(_unsafe_view_ViewMeta)
|
|
||||||
FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
|
|
||||||
bool /* has_symbolic_inputs */,
|
|
||||||
const std::vector<c10::SymInt>&);
|
|
||||||
|
|
||||||
_unsafe_view_ViewMeta(const SerializableTuple& tpl)
|
|
||||||
: _unsafe_view_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
|
|
||||||
|
|
||||||
_unsafe_view_ViewMeta(
|
|
||||||
bool has_symbolic_inputs,
|
|
||||||
const std::vector<c10::SymInt>& size)
|
|
||||||
: ViewMeta(has_symbolic_inputs), size(size) {}
|
|
||||||
|
|
||||||
Tensor forward(const Tensor& base) override;
|
|
||||||
Tensor reverse(const Tensor& base, const Tensor& mutated_view) override;
|
|
||||||
|
|
||||||
SerializableTuple to_serializable_tuple() {
|
|
||||||
return std::make_tuple(has_symbolic_inputs, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<c10::SymInt> size;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace at::functionalization
|
|
||||||
@ -45,39 +45,7 @@ inline void infer_size_impl(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (infer_dim) {
|
auto set_infer_dim = [&]() {
|
||||||
// numel is the product of known sizes, it has to be divisible by newsize.
|
|
||||||
// and newsize should be positive unless newsize == numel (we throw
|
|
||||||
// different) error message in that case.
|
|
||||||
if constexpr (std::is_same_v<NumelType, c10::SymInt>) {
|
|
||||||
auto v = newsize.maybe_as_int();
|
|
||||||
if (v and *v == 0) {
|
|
||||||
// Avoid div by 0 when sym_eq(numel % newsize, 0) is constructed!
|
|
||||||
// which may happen when newsize is not a symbol! if its a symbol
|
|
||||||
// division won't happen anyway during compile.
|
|
||||||
TORCH_MAYBE_SYM_CHECK(
|
|
||||||
numel == newsize,
|
|
||||||
"shape '",
|
|
||||||
shape,
|
|
||||||
"' is invalid for input of size ",
|
|
||||||
numel);
|
|
||||||
} else {
|
|
||||||
auto cond = sym_gt(newsize, 0)
|
|
||||||
.sym_and(sym_eq(numel % newsize, 0))
|
|
||||||
.sym_or(sym_eq(numel, newsize));
|
|
||||||
TORCH_MAYBE_SYM_CHECK(
|
|
||||||
cond, "shape '", shape, "' is invalid for input of size ", numel);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
TORCH_CHECK(
|
|
||||||
(newsize > 0 && (numel % newsize == 0)) || numel == newsize,
|
|
||||||
"shape '",
|
|
||||||
shape,
|
|
||||||
"' is invalid for input of size ",
|
|
||||||
numel);
|
|
||||||
}
|
|
||||||
|
|
||||||
// We have a degree of freedom here to select the dimension size; follow
|
// We have a degree of freedom here to select the dimension size; follow
|
||||||
// NumPy semantics and just bail. However, a nice error message is needed
|
// NumPy semantics and just bail. However, a nice error message is needed
|
||||||
// because users often use `view` as a way to flatten & unflatten
|
// because users often use `view` as a way to flatten & unflatten
|
||||||
@ -86,15 +54,19 @@ inline void infer_size_impl(
|
|||||||
// works yet
|
// works yet
|
||||||
// empty_tensor.view(-1, 0)
|
// empty_tensor.view(-1, 0)
|
||||||
// doesn't.
|
// doesn't.
|
||||||
TORCH_MAYBE_SYM_CHECK(
|
TORCH_CHECK(
|
||||||
newsize != 0,
|
newsize != 0,
|
||||||
"cannot reshape tensor of 0 elements into shape ",
|
"cannot reshape tensor of 0 elements into shape ",
|
||||||
shape,
|
shape,
|
||||||
" because the unspecified dimension size -1 can be any "
|
" because the unspecified dimension size -1 can be any "
|
||||||
"value and is ambiguous");
|
"value and is ambiguous");
|
||||||
|
|
||||||
res[*infer_dim] = numel / newsize;
|
res[*infer_dim] = numel / newsize;
|
||||||
return;
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (infer_dim && newsize > 0 && numel % newsize == 0) {
|
||||||
|
set_infer_dim();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
TORCH_MAYBE_SYM_CHECK(
|
TORCH_MAYBE_SYM_CHECK(
|
||||||
@ -103,6 +75,9 @@ inline void infer_size_impl(
|
|||||||
shape,
|
shape,
|
||||||
"' is invalid for input of size ",
|
"' is invalid for input of size ",
|
||||||
numel);
|
numel);
|
||||||
|
if (infer_dim) {
|
||||||
|
set_infer_dim();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::vector<int64_t> infer_size(IntArrayRef shape, int64_t numel) {
|
inline std::vector<int64_t> infer_size(IntArrayRef shape, int64_t numel) {
|
||||||
|
|||||||
@ -103,9 +103,7 @@ std::string get_cpu_capability() {
|
|||||||
#elif defined(HAVE_ZVECTOR_CPU_DEFINITION)
|
#elif defined(HAVE_ZVECTOR_CPU_DEFINITION)
|
||||||
case native::CPUCapability::ZVECTOR:
|
case native::CPUCapability::ZVECTOR:
|
||||||
return "Z VECTOR";
|
return "Z VECTOR";
|
||||||
#elif defined(HAVE_SVE_CPU_DEFINITION) && defined(HAVE_ARM_BF16_CPU_DEFINITION)
|
#elif defined(HAVE_SVE256_CPU_DEFINITION) && defined(HAVE_ARM_BF16_CPU_DEFINITION)
|
||||||
case native::CPUCapability::SVE128:
|
|
||||||
return "SVE128";
|
|
||||||
case native::CPUCapability::SVE256:
|
case native::CPUCapability::SVE256:
|
||||||
return "SVE256";
|
return "SVE256";
|
||||||
#else
|
#else
|
||||||
|
|||||||
@ -1,22 +1,32 @@
|
|||||||
#include <ATen/core/PythonOpRegistrationTrampoline.h>
|
#include <ATen/core/PythonOpRegistrationTrampoline.h>
|
||||||
#include <c10/core/impl/PyInterpreterHooks.h>
|
|
||||||
|
|
||||||
// TODO: delete this
|
|
||||||
namespace at::impl {
|
namespace at::impl {
|
||||||
|
|
||||||
c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::interpreter_ = nullptr;
|
// The strategy is that all python interpreters attempt to register themselves
|
||||||
|
// as the main interpreter, but only one wins. Only that interpreter is
|
||||||
|
// allowed to interact with the C++ dispatcher. Furthermore, when we execute
|
||||||
|
// logic on that interpreter, we do so hermetically, never setting pyobj field
|
||||||
|
// on Tensor.
|
||||||
|
|
||||||
|
std::atomic<c10::impl::PyInterpreter*>
|
||||||
|
PythonOpRegistrationTrampoline::interpreter_{nullptr};
|
||||||
|
|
||||||
c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::getInterpreter() {
|
c10::impl::PyInterpreter* PythonOpRegistrationTrampoline::getInterpreter() {
|
||||||
return c10::impl::getGlobalPyInterpreter();
|
return PythonOpRegistrationTrampoline::interpreter_.load();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PythonOpRegistrationTrampoline::registerInterpreter(
|
bool PythonOpRegistrationTrampoline::registerInterpreter(
|
||||||
c10::impl::PyInterpreter* interp) {
|
c10::impl::PyInterpreter* interp) {
|
||||||
if (interpreter_ != nullptr) {
|
c10::impl::PyInterpreter* expected = nullptr;
|
||||||
|
interpreter_.compare_exchange_strong(expected, interp);
|
||||||
|
if (expected != nullptr) {
|
||||||
|
// This is the second (or later) Python interpreter, which means we need
|
||||||
|
// non-trivial hermetic PyObject TLS
|
||||||
|
c10::impl::HermeticPyObjectTLS::init_state();
|
||||||
return false;
|
return false;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
interpreter_ = interp;
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace at::impl
|
} // namespace at::impl
|
||||||
|
|||||||
@ -2,21 +2,19 @@
|
|||||||
|
|
||||||
#include <ATen/core/dispatch/Dispatcher.h>
|
#include <ATen/core/dispatch/Dispatcher.h>
|
||||||
|
|
||||||
// TODO: We can get rid of this
|
// TODO: this can probably live in c10
|
||||||
|
|
||||||
|
|
||||||
namespace at::impl {
|
namespace at::impl {
|
||||||
|
|
||||||
// Manages the single Python interpreter instance for PyTorch.
|
|
||||||
class TORCH_API PythonOpRegistrationTrampoline final {
|
class TORCH_API PythonOpRegistrationTrampoline final {
|
||||||
static c10::impl::PyInterpreter* interpreter_;
|
static std::atomic<c10::impl::PyInterpreter*> interpreter_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Register the Python interpreter. Returns true on first registration,
|
// Returns true if you successfully registered yourself (that means
|
||||||
// false if an interpreter was already registered.
|
// you are in the hot seat for doing the operator registrations!)
|
||||||
static bool registerInterpreter(c10::impl::PyInterpreter*);
|
static bool registerInterpreter(c10::impl::PyInterpreter*);
|
||||||
|
|
||||||
// Returns the registered interpreter via the global PyInterpreter hooks.
|
|
||||||
// Returns nullptr if no interpreter has been registered yet.
|
// Returns nullptr if no interpreter has been registered yet.
|
||||||
static c10::impl::PyInterpreter* getInterpreter();
|
static c10::impl::PyInterpreter* getInterpreter();
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1234,7 +1234,7 @@ struct TORCH_API TupleType : public NamedType {
|
|||||||
std::shared_ptr<FunctionSchema> schema_;
|
std::shared_ptr<FunctionSchema> schema_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// the common supertype of all Enums, only used in operator registration.
|
// the common supertype of all Enums, only used in operator registraion.
|
||||||
// EnumType <: AnyEnumType for all Enums
|
// EnumType <: AnyEnumType for all Enums
|
||||||
struct AnyEnumType;
|
struct AnyEnumType;
|
||||||
using AnyEnumTypePtr = SingletonTypePtr<AnyEnumType>;
|
using AnyEnumTypePtr = SingletonTypePtr<AnyEnumType>;
|
||||||
|
|||||||
@ -102,31 +102,8 @@ struct VecReduceAllSIMD<float, Op> {
|
|||||||
#endif // defined(__GNUC__) && (__GNUC__ > 5) && !defined(_MSC_VER) &&
|
#endif // defined(__GNUC__) && (__GNUC__ > 5) && !defined(_MSC_VER) &&
|
||||||
// !defined(C10_MOBILE)
|
// !defined(C10_MOBILE)
|
||||||
|
|
||||||
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) && \
|
||||||
#if defined(CPU_CAPABILITY_SVE256)
|
!defined(CPU_CAPABILITY_SVE)
|
||||||
template <typename Op>
|
|
||||||
struct VecReduceAllSIMD<float, Op> {
|
|
||||||
static inline float apply(
|
|
||||||
const Op& vec_fun,
|
|
||||||
const Vectorized<float>& acc_vec) {
|
|
||||||
using Vec = Vectorized<float>;
|
|
||||||
Vec v = acc_vec;
|
|
||||||
// 128-bit shuffle
|
|
||||||
svuint32_t ind = svdupq_n_u32(4, 5, 6, 7);
|
|
||||||
Vec v1 = svtbl_f32(v, ind);
|
|
||||||
v = vec_fun(v, v1);
|
|
||||||
// 64-bit shuffle
|
|
||||||
ind = svdupq_n_u32(2, 3, 0, 1);
|
|
||||||
v1 = svtbl_f32(v, ind);
|
|
||||||
v = vec_fun(v, v1);
|
|
||||||
// 32-bit shuffle
|
|
||||||
ind = svdupq_n_u32(1, 0, 2, 3);
|
|
||||||
v1 = svtbl_f32(v, ind);
|
|
||||||
v = vec_fun(v, v1);
|
|
||||||
return svlasta(svpfalse(), v);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
#else
|
|
||||||
template <typename Op>
|
template <typename Op>
|
||||||
struct VecReduceAllSIMD<float, Op> {
|
struct VecReduceAllSIMD<float, Op> {
|
||||||
static inline float apply(
|
static inline float apply(
|
||||||
@ -163,8 +140,35 @@ struct VecReduceAllSIMD<float, std::plus<Vectorized<float>>> {
|
|||||||
return vaddvq_f32(acc_vec);
|
return vaddvq_f32(acc_vec);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif // defined(CPU_CAPABILITY_SVE256)
|
|
||||||
#endif // defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
#endif // defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
||||||
|
// && !defined(CPU_CAPABILITY_SVE)
|
||||||
|
|
||||||
|
#if defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) && \
|
||||||
|
defined(CPU_CAPABILITY_SVE256)
|
||||||
|
template <typename Op>
|
||||||
|
struct VecReduceAllSIMD<float, Op> {
|
||||||
|
static inline float apply(
|
||||||
|
const Op& vec_fun,
|
||||||
|
const Vectorized<float>& acc_vec) {
|
||||||
|
using Vec = Vectorized<float>;
|
||||||
|
Vec v = acc_vec;
|
||||||
|
// 128-bit shuffle
|
||||||
|
svuint32_t ind = svdupq_n_u32(4, 5, 6, 7);
|
||||||
|
Vec v1 = svtbl_f32(v, ind);
|
||||||
|
v = vec_fun(v, v1);
|
||||||
|
// 64-bit shuffle
|
||||||
|
ind = svdupq_n_u32(2, 3, 0, 1);
|
||||||
|
v1 = svtbl_f32(v, ind);
|
||||||
|
v = vec_fun(v, v1);
|
||||||
|
// 32-bit shuffle
|
||||||
|
ind = svdupq_n_u32(1, 0, 2, 3);
|
||||||
|
v1 = svtbl_f32(v, ind);
|
||||||
|
v = vec_fun(v, v1);
|
||||||
|
return svlasta(svpfalse(), v);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif // defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__)
|
||||||
|
// && defined(CPU_CAPABILITY_SVE256)
|
||||||
|
|
||||||
template <typename scalar_t, typename Op>
|
template <typename scalar_t, typename Op>
|
||||||
inline scalar_t vec_reduce_all(
|
inline scalar_t vec_reduce_all(
|
||||||
|
|||||||
@ -1,21 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ATen/cpu/vec/intrinsics.h>
|
#include <ATen/cpu/vec/intrinsics.h>
|
||||||
#include <c10/macros/Macros.h>
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
#include <ATen/cpu/vec/vec_base.h>
|
#include <ATen/cpu/vec/vec_base.h>
|
||||||
|
|
||||||
#if defined(__aarch64__) && \
|
|
||||||
(defined(AT_BUILD_ARM_VEC256_WITH_SLEEF) || \
|
|
||||||
defined(AT_BUILD_ARM_VECSVE_WITH_SLEEF))
|
|
||||||
#define SLEEF_STATIC_LIBS
|
|
||||||
#include <sleef.h>
|
|
||||||
#define USE_SLEEF(sleef_code, non_sleef_code) sleef_code
|
|
||||||
#else
|
|
||||||
#define USE_SLEEF(sleef_code, non_sleef_code) non_sleef_code
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(CPU_CAPABILITY_SVE)
|
#if defined(CPU_CAPABILITY_SVE)
|
||||||
|
|
||||||
// Define the data type of VLS(vector-length specific).
|
// Define the data type of VLS(vector-length specific).
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user