mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-02 06:24:59 +08:00
Compare commits
12 Commits
inlining
...
fa_u8_brge
| Author | SHA1 | Date | |
|---|---|---|---|
| f10edf1ecd | |||
| 9d645a6025 | |||
| 676da3c16a | |||
| 46769004e5 | |||
| 9cb324d903 | |||
| 325db8f2a3 | |||
| 97922c4754 | |||
| d72ab195da | |||
| 43b5c4101d | |||
| b640cf15ab | |||
| 67ccb2ce72 | |||
| 4a2715e652 |
@ -6,7 +6,6 @@ from cryptography.hazmat.primitives import hashes, serialization
|
||||
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||
from cryptography.x509.oid import NameOID
|
||||
|
||||
|
||||
temp_dir = mkdtemp()
|
||||
print(temp_dir)
|
||||
|
||||
|
||||
@ -18,7 +18,6 @@ time python test/run_test.py --verbose -i distributed/test_c10d_gloo
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_nccl
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
|
||||
time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
|
||||
time python test/run_test.py --verbose -i distributed/test_store
|
||||
time python test/run_test.py --verbose -i distributed/test_symmetric_memory
|
||||
time python test/run_test.py --verbose -i distributed/test_pg_wrapper
|
||||
|
||||
@ -3,7 +3,6 @@ import json
|
||||
import math
|
||||
import sys
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--test-name", dest="test_name", action="store", required=True, help="test name"
|
||||
|
||||
@ -3,7 +3,6 @@ import sys
|
||||
|
||||
import numpy
|
||||
|
||||
|
||||
sample_data_list = sys.argv[1:]
|
||||
sample_data_list = [float(v.strip()) for v in sample_data_list]
|
||||
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
data_file_path = sys.argv[1]
|
||||
commit_hash = sys.argv[2]
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import sys
|
||||
|
||||
|
||||
log_file_path = sys.argv[1]
|
||||
|
||||
with open(log_file_path) as f:
|
||||
|
||||
@ -4,7 +4,6 @@ import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
COMMON_TESTS = [
|
||||
(
|
||||
"Checking that torch is available",
|
||||
|
||||
@ -5,7 +5,6 @@ import sys
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
# Need to import modules that lie on an upward-relative path
|
||||
sys.path.append(os.path.join(sys.path[0], ".."))
|
||||
|
||||
|
||||
@ -118,18 +118,9 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then
|
||||
cd /tmp/libtorch
|
||||
fi
|
||||
|
||||
if [[ "$GPU_ARCH_TYPE" == xpu ]]; then
|
||||
# Workaround for __mkl_tmp_MOD unbound variable issue, refer https://github.com/pytorch/pytorch/issues/130543
|
||||
set +u
|
||||
source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
|
||||
fi
|
||||
|
||||
# Test the package
|
||||
/builder/check_binary.sh
|
||||
|
||||
# Clean temp files
|
||||
cd /builder && git clean -ffdx
|
||||
|
||||
# =================== The above code will be executed inside Docker container ===================
|
||||
EOL
|
||||
echo
|
||||
|
||||
@ -100,20 +100,6 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_B
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton xpu package
|
||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* && $(uname) == "Linux" ]]; then
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}"
|
||||
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
|
||||
TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt)
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+${TRITON_SHORTHASH}"
|
||||
fi
|
||||
if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
|
||||
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
|
||||
else
|
||||
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}"
|
||||
fi
|
||||
fi
|
||||
|
||||
cat >"$envfile" <<EOL
|
||||
# =================== The following code will be executed inside Docker container ===================
|
||||
export TZ=UTC
|
||||
|
||||
@ -29,11 +29,6 @@ if [[ "${USE_SPLIT_BUILD:-false}" == "true" ]]; then
|
||||
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_pypi_pkg"
|
||||
fi
|
||||
|
||||
# this is special build with all dependencies packaged
|
||||
if [[ ${BUILD_NAME} == *-full* ]]; then
|
||||
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_full"
|
||||
fi
|
||||
|
||||
# Sleep 2 minutes between retries for conda upload
|
||||
retry () {
|
||||
"$@" || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@")
|
||||
|
||||
@ -8,7 +8,6 @@ import time
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
AZURE_PIPELINE_BASE_URL = "https://aiinfra.visualstudio.com/PyTorch/"
|
||||
AZURE_DEVOPS_PAT_BASE64 = os.environ.get("AZURE_DEVOPS_PAT_BASE64_SECRET", "")
|
||||
PIPELINE_ID = "911"
|
||||
|
||||
2
.flake8
2
.flake8
@ -2,7 +2,7 @@
|
||||
# NOTE: **Mirror any changes** to this file the [tool.ruff] config in pyproject.toml
|
||||
# before we can fully move to use ruff
|
||||
enable-extensions = G
|
||||
select = B,C,E,F,G,P,SIM1,SIM911,T4,W,B9,TOR0,TOR1,TOR2,TOR9
|
||||
select = B,C,E,F,G,P,SIM1,T4,W,B9,TOR0,TOR1,TOR2,TOR9
|
||||
max-line-length = 120
|
||||
# C408 ignored because we like the dict keyword argument syntax
|
||||
# E501 is not flexible enough, we're using B950 instead
|
||||
|
||||
@ -40,7 +40,3 @@ e6ec0efaf87703c5f889cfc20b29be455885d58d
|
||||
a53cda1ddc15336dc1ff0ce1eff2a49cdc5f882e
|
||||
# 2024-01-02 clangformat: fused adam #116583
|
||||
9dc68d1aa9e554d09344a10fff69f7b50b2d23a0
|
||||
# 2024-06-28 enable UFMT in `torch/storage.py`
|
||||
d80939e5e9337e8078f11489afefec59fd42f93b
|
||||
# 2024-06-28 enable UFMT in `torch.utils.data`
|
||||
7cf0b90e49689d45be91aa539fdf54cf2ea8a9a3
|
||||
|
||||
2
.github/actionlint.yaml
vendored
2
.github/actionlint.yaml
vendored
@ -47,5 +47,3 @@ self-hosted-runner:
|
||||
- macos-latest-xlarge
|
||||
- macos-13-xlarge
|
||||
- macos-14-xlarge
|
||||
# Organization-wide Intel hosted XPU runners
|
||||
- linux.idc.xpu
|
||||
|
||||
10
.github/actions/test-pytorch-binary/action.yml
vendored
10
.github/actions/test-pytorch-binary/action.yml
vendored
@ -36,8 +36,7 @@ runs:
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
|
||||
echo "CONTAINER_NAME=${container_name}" >> "$GITHUB_ENV"
|
||||
if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" && "${BUILD_ENVIRONMENT}" != "linux-s390x-binary-manywheel" && "${GPU_ARCH_TYPE}" != "xpu" ]]; then
|
||||
if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" && "${BUILD_ENVIRONMENT}" != "linux-s390x-binary-manywheel" ]]; then
|
||||
# Propagate download.pytorch.org IP to container. This is only needed on Linux non aarch64 runner
|
||||
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" bash -c "/bin/cat >> /etc/hosts"
|
||||
fi
|
||||
@ -48,9 +47,10 @@ runs:
|
||||
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
|
||||
|
||||
- name: Cleanup docker
|
||||
if: always() && (env.BUILD_ENVIRONMENT == 'linux-s390x-binary-manywheel' || env.GPU_ARCH_TYPE == 'xpu')
|
||||
if: always() && env.BUILD_ENVIRONMENT == 'linux-s390x-binary-manywheel'
|
||||
shell: bash
|
||||
run: |
|
||||
# on s390x or xpu stop the container for clean worker stop
|
||||
# on s390x stop the container for clean worker stop
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop "${{ env.CONTAINER_NAME }}" || true
|
||||
docker stop $(docker ps -q) || true
|
||||
|
||||
2
.github/merge_rules.yaml
vendored
2
.github/merge_rules.yaml
vendored
@ -407,7 +407,7 @@
|
||||
- torch/_inductor/codegen/cpp_template.py
|
||||
- torch/_inductor/codegen/cpp_gemm_template.py
|
||||
- test/inductor/test_mkldnn_pattern_matcher.py
|
||||
- test/inductor/test_cpu_repro.py
|
||||
- test/inductor/test_cpu_repo.py
|
||||
- test/inductor/test_cpu_cpp_wrapper.py
|
||||
- test/inductor/test_cpu_select_algorithm.py
|
||||
- aten/src/ATen/cpu/**
|
||||
|
||||
2
.github/scripts/build_triton_wheel.py
vendored
2
.github/scripts/build_triton_wheel.py
vendored
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
@ -8,7 +7,6 @@ from subprocess import check_call
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import Optional
|
||||
|
||||
|
||||
SCRIPT_DIR = Path(__file__).parent
|
||||
REPO_DIR = SCRIPT_DIR.parent.parent
|
||||
|
||||
|
||||
1
.github/scripts/check_labels.py
vendored
1
.github/scripts/check_labels.py
vendored
@ -5,6 +5,7 @@ import sys
|
||||
from typing import Any
|
||||
|
||||
from github_utils import gh_delete_comment, gh_post_pr_comment
|
||||
|
||||
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
|
||||
from label_utils import has_required_labels, is_label_err_comment, LABEL_ERR_MSG
|
||||
from trymerge import GitHubPR
|
||||
|
||||
2
.github/scripts/cherry_pick.py
vendored
2
.github/scripts/cherry_pick.py
vendored
@ -4,9 +4,11 @@ import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any, cast, Dict, List, Optional
|
||||
|
||||
from urllib.error import HTTPError
|
||||
|
||||
from github_utils import gh_fetch_url, gh_post_pr_comment, gh_query_issues_by_labels
|
||||
|
||||
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
|
||||
from trymerge import get_pr_commit_sha, GitHubPR
|
||||
|
||||
|
||||
@ -10,7 +10,6 @@ import requests
|
||||
import rockset # type: ignore[import]
|
||||
from gitutils import retries_decorator
|
||||
|
||||
|
||||
LOGS_QUERY = """
|
||||
with
|
||||
shas as (
|
||||
|
||||
2
.github/scripts/collect_ciflow_labels.py
vendored
2
.github/scripts/collect_ciflow_labels.py
vendored
@ -1,12 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, cast, Dict, List, Set
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
GITHUB_DIR = Path(__file__).parent.parent
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple, Optional
|
||||
|
||||
1
.github/scripts/delete_old_branches.py
vendored
1
.github/scripts/delete_old_branches.py
vendored
@ -9,7 +9,6 @@ from typing import Any, Callable, Dict, List, Set
|
||||
from github_utils import gh_fetch_json_dict, gh_graphql
|
||||
from gitutils import GitRepo
|
||||
|
||||
|
||||
SEC_IN_DAY = 24 * 60 * 60
|
||||
CLOSED_PR_RETENTION = 30 * SEC_IN_DAY
|
||||
NO_PR_RETENTION = 1.5 * 365 * SEC_IN_DAY
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
1
.github/scripts/export_pytorch_labels.py
vendored
1
.github/scripts/export_pytorch_labels.py
vendored
@ -14,6 +14,7 @@ import json
|
||||
from typing import Any
|
||||
|
||||
import boto3 # type: ignore[import]
|
||||
|
||||
from label_utils import gh_get_labels
|
||||
|
||||
|
||||
|
||||
1
.github/scripts/filter_test_configs.py
vendored
1
.github/scripts/filter_test_configs.py
vendored
@ -15,7 +15,6 @@ from urllib.request import Request, urlopen
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
REENABLE_TEST_REGEX = "(?i)(Close(d|s)?|Resolve(d|s)?|Fix(ed|es)?) (#|https://github.com/pytorch/pytorch/issues/)([0-9]+)"
|
||||
|
||||
PREFIX = "test-config/"
|
||||
|
||||
44
.github/scripts/generate_binary_build_matrix.py
vendored
44
.github/scripts/generate_binary_build_matrix.py
vendored
@ -8,13 +8,11 @@ architectures:
|
||||
* CPU
|
||||
* Latest CUDA
|
||||
* Latest ROCM
|
||||
* Latest XPU
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
CUDA_ARCHES = ["11.8", "12.1", "12.4"]
|
||||
|
||||
|
||||
@ -26,7 +24,6 @@ CUDA_ARCHES_CUDNN_VERSION = {"11.8": "9", "12.1": "9", "12.4": "9"}
|
||||
|
||||
ROCM_ARCHES = ["6.0", "6.1"]
|
||||
|
||||
XPU_ARCHES = ["xpu"]
|
||||
|
||||
CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
|
||||
|
||||
@ -135,8 +132,6 @@ def arch_type(arch_version: str) -> str:
|
||||
return "cuda"
|
||||
elif arch_version in ROCM_ARCHES:
|
||||
return "rocm"
|
||||
elif arch_version in XPU_ARCHES:
|
||||
return "xpu"
|
||||
elif arch_version in CPU_CXX11_ABI_ARCH:
|
||||
return "cpu-cxx11-abi"
|
||||
elif arch_version in CPU_AARCH64_ARCH:
|
||||
@ -161,7 +156,6 @@ WHEEL_CONTAINER_IMAGES = {
|
||||
gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in ROCM_ARCHES
|
||||
},
|
||||
"xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}",
|
||||
"cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
|
||||
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
|
||||
"cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
|
||||
@ -227,7 +221,6 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
|
||||
"cuda": f"cu{gpu_arch_version.replace('.', '')}",
|
||||
"cuda-aarch64": "cu124",
|
||||
"rocm": f"rocm{gpu_arch_version}",
|
||||
"xpu": "xpu",
|
||||
}.get(gpu_arch_type, gpu_arch_version)
|
||||
|
||||
|
||||
@ -338,7 +331,7 @@ def generate_wheels_matrix(
|
||||
# Define default compute archivectures
|
||||
arches = ["cpu"]
|
||||
if os == "linux":
|
||||
arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
|
||||
arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES
|
||||
elif os == "windows":
|
||||
arches += CUDA_ARCHES
|
||||
elif os == "linux-aarch64":
|
||||
@ -361,14 +354,11 @@ def generate_wheels_matrix(
|
||||
or arch_version == "cpu-aarch64"
|
||||
or arch_version == "cpu-s390x"
|
||||
or arch_version == "cuda-aarch64"
|
||||
or arch_version == "xpu"
|
||||
else arch_version
|
||||
)
|
||||
|
||||
# TODO: Enable python 3.13 on rocm, xpu, aarch64, windows
|
||||
if (
|
||||
gpu_arch_type in ["rocm", "xpu"] or os != "linux"
|
||||
) and python_version == "3.13":
|
||||
# TODO: Enable python 3.13 on rocm, aarch64, windows
|
||||
if (gpu_arch_type == "rocm" or os != "linux") and python_version == "3.13":
|
||||
continue
|
||||
|
||||
# 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
|
||||
@ -410,7 +400,9 @@ def generate_wheels_matrix(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "True",
|
||||
"devtoolset": "",
|
||||
"devtoolset": (
|
||||
"cxx11-abi" if arch_version == "cuda-aarch64" else ""
|
||||
),
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"pytorch_extra_install_requirements": (
|
||||
@ -423,26 +415,6 @@ def generate_wheels_matrix(
|
||||
),
|
||||
}
|
||||
)
|
||||
# Special build building to use on Colab. PyThon 3.10 for 12.1 CUDA
|
||||
if python_version == "3.10" and arch_version == "12.1":
|
||||
ret.append(
|
||||
{
|
||||
"python_version": python_version,
|
||||
"gpu_arch_type": gpu_arch_type,
|
||||
"gpu_arch_version": gpu_arch_version,
|
||||
"desired_cuda": translate_desired_cuda(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "False",
|
||||
"devtoolset": "",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"pytorch_extra_install_requirements": "",
|
||||
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace( # noqa: B950
|
||||
".", "_"
|
||||
),
|
||||
}
|
||||
)
|
||||
else:
|
||||
ret.append(
|
||||
{
|
||||
@ -453,9 +425,7 @@ def generate_wheels_matrix(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"devtoolset": (
|
||||
"cxx11-abi"
|
||||
if arch_version in ["cpu-cxx11-abi", "xpu"]
|
||||
else ""
|
||||
"cxx11-abi" if arch_version == "cpu-cxx11-abi" else ""
|
||||
),
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
|
||||
2
.github/scripts/generate_ci_workflows.py
vendored
2
.github/scripts/generate_ci_workflows.py
vendored
@ -8,8 +8,8 @@ from typing import Dict, Iterable, List, Literal, Set
|
||||
from typing_extensions import TypedDict # Python 3.11+
|
||||
|
||||
import generate_binary_build_matrix # type: ignore[import]
|
||||
import jinja2
|
||||
|
||||
import jinja2
|
||||
|
||||
Arch = Literal["windows", "linux", "macos"]
|
||||
|
||||
|
||||
@ -16,7 +16,6 @@ from typing import Dict, List
|
||||
|
||||
import generate_binary_build_matrix
|
||||
|
||||
|
||||
DOCKER_IMAGE_TYPES = ["runtime", "devel"]
|
||||
|
||||
|
||||
|
||||
2
.github/scripts/generate_pytorch_version.py
vendored
2
.github/scripts/generate_pytorch_version.py
vendored
@ -4,11 +4,11 @@ import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from datetime import datetime
|
||||
from distutils.util import strtobool
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
LEADING_V_PATTERN = re.compile("^v")
|
||||
TRAILING_RC_PATTERN = re.compile("-rc[0-9]*$")
|
||||
LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$")
|
||||
|
||||
1
.github/scripts/get_workflow_job_id.py
vendored
1
.github/scripts/get_workflow_job_id.py
vendored
@ -11,6 +11,7 @@ import sys
|
||||
import time
|
||||
import urllib
|
||||
import urllib.parse
|
||||
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
|
||||
1
.github/scripts/github_utils.py
vendored
1
.github/scripts/github_utils.py
vendored
@ -3,6 +3,7 @@
|
||||
import json
|
||||
import os
|
||||
import warnings
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union
|
||||
from urllib.error import HTTPError
|
||||
|
||||
1
.github/scripts/gitutils.py
vendored
1
.github/scripts/gitutils.py
vendored
@ -19,7 +19,6 @@ from typing import (
|
||||
Union,
|
||||
)
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
RE_GITHUB_URL_MATCH = re.compile("^https://.*@?github.com/(.+)/(.+)$")
|
||||
|
||||
2
.github/scripts/label_utils.py
vendored
2
.github/scripts/label_utils.py
vendored
@ -1,12 +1,12 @@
|
||||
"""GitHub Label Utilities."""
|
||||
|
||||
import json
|
||||
|
||||
from functools import lru_cache
|
||||
from typing import Any, List, Tuple, TYPE_CHECKING, Union
|
||||
|
||||
from github_utils import gh_fetch_url_and_headers, GitHubComment
|
||||
|
||||
|
||||
# TODO: this is a temp workaround to avoid circular dependencies,
|
||||
# and should be removed once GitHubPR is refactored out of trymerge script.
|
||||
if TYPE_CHECKING:
|
||||
|
||||
1
.github/scripts/pytest_cache.py
vendored
1
.github/scripts/pytest_cache.py
vendored
@ -9,7 +9,6 @@ from pytest_caching_utils import (
|
||||
upload_pytest_cache,
|
||||
)
|
||||
|
||||
|
||||
TEMP_DIR = "./tmp" # a backup location in case one isn't provided
|
||||
|
||||
|
||||
|
||||
1
.github/scripts/pytest_caching_utils.py
vendored
1
.github/scripts/pytest_caching_utils.py
vendored
@ -14,7 +14,6 @@ from file_io_utils import (
|
||||
zip_folder,
|
||||
)
|
||||
|
||||
|
||||
PYTEST_CACHE_KEY_PREFIX = "pytest_cache"
|
||||
PYTEST_CACHE_DIR_NAME = ".pytest_cache"
|
||||
BUCKET = "gha-artifacts"
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
set -eoux pipefail
|
||||
|
||||
SYNC_BRANCH=pytorch-stable-prototype
|
||||
SYNC_BRANCH=fbcode/pytorch-stable-prototype
|
||||
|
||||
git config user.email "fake@example.com"
|
||||
git config user.name "PyTorch Stable Bot"
|
||||
@ -11,9 +11,7 @@ git fetch origin main
|
||||
git fetch origin "$SYNC_BRANCH"
|
||||
git checkout "$SYNC_BRANCH"
|
||||
|
||||
# Using a hardcoded SHA here is a massive speedup as we can skip the entire history of the pytorch GitHub repo.
|
||||
# This specific SHA was chosen as it was before the "branch point" of the stable branch
|
||||
for SHA in $(git log ba3b05fdf37ddbc3c301294d6a560a816335e717..origin/main --pretty="%h" --reverse -- torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed)
|
||||
for SHA in $(git log 4333e122d4b74cdf84351ed2907045c6a767b4cd..origin/main --pretty="%h" --reverse -- torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed)
|
||||
do
|
||||
# `git merge-base --is-ancestor` exits with code 0 if the given SHA is an ancestor, and non-0 otherwise
|
||||
if git merge-base --is-ancestor $SHA HEAD || [[ $(git log --grep="(cherry picked from commit $SHA") ]]
|
||||
@ -22,12 +20,7 @@ do
|
||||
continue
|
||||
fi
|
||||
echo "Copying $SHA"
|
||||
git cherry-pick -x "$SHA" -X theirs
|
||||
git reset --soft HEAD~1
|
||||
git add torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed
|
||||
git checkout .
|
||||
git commit --reuse-message=HEAD@{1}
|
||||
git clean -f
|
||||
git cherry-pick -x "$SHA"
|
||||
done
|
||||
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
|
||||
@ -41,7 +41,7 @@ def main() -> None:
|
||||
)
|
||||
|
||||
options = parser.parse_args()
|
||||
tagged_images: Dict[str, bool] = {}
|
||||
tagged_images: Dict[str, bool] = dict()
|
||||
platform_images = [
|
||||
generate_binary_build_matrix.WHEEL_CONTAINER_IMAGES,
|
||||
generate_binary_build_matrix.LIBTORCH_CONTAINER_IMAGES,
|
||||
|
||||
1
.github/scripts/td_llm_indexer.sh
vendored
1
.github/scripts/td_llm_indexer.sh
vendored
@ -7,7 +7,6 @@ cd llm-target-determinator
|
||||
pip install -q -r requirements.txt
|
||||
cd ../codellama
|
||||
pip install -e .
|
||||
pip install numpy==1.26.0
|
||||
|
||||
# Run indexer
|
||||
cd ../llm-target-determinator
|
||||
|
||||
3
.github/scripts/test_trymerge.py
vendored
3
.github/scripts/test_trymerge.py
vendored
@ -17,7 +17,9 @@ from unittest import main, mock, skip, TestCase
|
||||
from urllib.error import HTTPError
|
||||
|
||||
from github_utils import gh_graphql
|
||||
|
||||
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
|
||||
|
||||
from trymerge import (
|
||||
categorize_checks,
|
||||
DRCI_CHECKRUN_NAME,
|
||||
@ -37,7 +39,6 @@ from trymerge import (
|
||||
validate_revert,
|
||||
)
|
||||
|
||||
|
||||
if "GIT_REMOTE_URL" not in os.environ:
|
||||
os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"
|
||||
|
||||
|
||||
2
.github/scripts/trymerge.py
vendored
2
.github/scripts/trymerge.py
vendored
@ -45,6 +45,7 @@ from github_utils import (
|
||||
gh_update_pr_state,
|
||||
GitHubComment,
|
||||
)
|
||||
|
||||
from gitutils import (
|
||||
are_ghstack_branches_in_sync,
|
||||
get_git_remote_name,
|
||||
@ -61,7 +62,6 @@ from label_utils import (
|
||||
)
|
||||
from trymerge_explainer import get_revert_message, TryMergeExplainer
|
||||
|
||||
|
||||
# labels
|
||||
MERGE_IN_PROGRESS_LABEL = "merging"
|
||||
MERGE_COMPLETE_LABEL = "merged"
|
||||
|
||||
1
.github/scripts/tryrebase.py
vendored
1
.github/scripts/tryrebase.py
vendored
@ -11,7 +11,6 @@ from github_utils import gh_post_pr_comment as gh_post_comment
|
||||
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
|
||||
from trymerge import GitHubPR
|
||||
|
||||
|
||||
SAME_SHA_ERROR = (
|
||||
"\n```\nAborting rebase because rebasing the branch resulted in the same sha as the target branch.\n"
|
||||
+ "This usually happens because the PR has already been merged. Please rebase locally and push.\n```"
|
||||
|
||||
@ -81,7 +81,7 @@ jobs:
|
||||
!{{ config["build_name"] }}-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: !{{ config["build_name"] }}-build
|
||||
{%- if config["gpu_arch_type"] not in ["rocm", "xpu"] %}
|
||||
{%- if config["gpu_arch_type"] != "rocm" %}
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:!{{ upload.binary_env_as_input(config) }}
|
||||
build_name: !{{ config["build_name"] }}
|
||||
@ -101,40 +101,6 @@ jobs:
|
||||
{%- endif %}
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
{%- elif config["gpu_arch_type"] == "xpu" %}
|
||||
runs-on: linux.idc.xpu
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config) }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Setup XPU
|
||||
uses: ./.github/actions/setup-xpu
|
||||
- name: configure aws credentials
|
||||
id: aws_creds
|
||||
uses: aws-actions/configure-aws-credentials@v1.7.0
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
aws-region: us-east-1
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
- uses: !{{ common.download_artifact_action }}
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
|
||||
!{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: !{{ config["container_image"] }}
|
||||
- name: Test Pytorch binary
|
||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||
- name: Teardown XPU
|
||||
uses: ./.github/actions/teardown-xpu
|
||||
{%- else %}
|
||||
runs-on: linux.rocm.gpu
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
|
||||
105
.github/workflows/_linux-build-rg.yml
vendored
Normal file
105
.github/workflows/_linux-build-rg.yml
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
name: linux-build-rg
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-environment:
|
||||
required: true
|
||||
type: string
|
||||
description: Top-level label for what's being built/tested.
|
||||
docker-image-name:
|
||||
required: true
|
||||
type: string
|
||||
description: Name of the base docker image to build with.
|
||||
build-generates-artifacts:
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
description: If set, upload generated build artifacts.
|
||||
build-with-debug:
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
description: If set, build in debug mode.
|
||||
sync-tag:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
description: |
|
||||
If this is set, our linter will use this to make sure that every other
|
||||
job with the same `sync-tag` is identical.
|
||||
cuda-arch-list:
|
||||
required: false
|
||||
type: string
|
||||
default: "5.2"
|
||||
description: |
|
||||
List of CUDA architectures CI build should target.
|
||||
runner-group:
|
||||
required: false
|
||||
type: string
|
||||
default: "arc-lf-linux.2xlarge"
|
||||
description: Runner group to select group type
|
||||
test-matrix:
|
||||
required: false
|
||||
type: string
|
||||
description: |
|
||||
An option JSON description of what test configs to run later on. This
|
||||
is moved here from the Linux test workflow so that we can apply filter
|
||||
logic using test-config labels earlier and skip unnecessary builds
|
||||
s3-bucket:
|
||||
description: S3 bucket to download artifact
|
||||
required: false
|
||||
type: string
|
||||
default: "gha-artifacts"
|
||||
aws-role-to-assume:
|
||||
description: role to assume for downloading artifacts
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
secrets:
|
||||
HUGGING_FACE_HUB_TOKEN:
|
||||
required: false
|
||||
description: |
|
||||
HF Auth token to avoid rate limits when downloading models or datasets from hub
|
||||
|
||||
outputs:
|
||||
docker-image:
|
||||
value: ${{ jobs.build.outputs.docker-image }}
|
||||
description: The docker image containing the built PyTorch.
|
||||
test-matrix:
|
||||
value: ${{ jobs.build.outputs.test-matrix }}
|
||||
description: An optional JSON description of what test configs to run later on.
|
||||
|
||||
jobs:
|
||||
build:
|
||||
# Don't run on forked repos
|
||||
if: github.repository_owner == 'pytorch'
|
||||
runs-on:
|
||||
group: ${{ inputs.runner-group }}
|
||||
timeout-minutes: 240
|
||||
outputs:
|
||||
docker-image: ${{ steps.linux-build.outputs.docker-image }}
|
||||
test-matrix: ${{ steps.linux-build.outputs.test-matrix }}
|
||||
steps:
|
||||
# [pytorch repo ref]
|
||||
# Use a pytorch/pytorch reference instead of a reference to the local
|
||||
# checkout because when we run this action we don't *have* a local
|
||||
# checkout. In other cases you should prefer a local checkout.
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
|
||||
- name: Linux Build
|
||||
id: linux-build
|
||||
uses: ./.github/actions/linux-build
|
||||
with:
|
||||
build-environment: ${{ inputs.build-environment }}
|
||||
docker-image-name: ${{ inputs.docker-image-name }}
|
||||
build-generates-artifacts: ${{ inputs.build-generates-artifacts }}
|
||||
build-with-debug: ${{ inputs.build-with-debug }}
|
||||
sync-tag: ${{ inputs.sync-tag }}
|
||||
cuda-arch-list: ${{ inputs.cuda-arch-list }}
|
||||
test-matrix: ${{ inputs.test-matrix }}
|
||||
s3-bucket: ${{ inputs.s3-bucket }}
|
||||
aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
85
.github/workflows/_linux-test-label.yml
vendored
Normal file
85
.github/workflows/_linux-test-label.yml
vendored
Normal file
@ -0,0 +1,85 @@
|
||||
name: linux-test-rg
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-environment:
|
||||
required: true
|
||||
type: string
|
||||
description: Top-level label for what's being built/tested.
|
||||
test-matrix:
|
||||
required: true
|
||||
type: string
|
||||
description: JSON description of what test configs to run.
|
||||
docker-image:
|
||||
required: true
|
||||
type: string
|
||||
description: Docker image to run in.
|
||||
sync-tag:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
description: |
|
||||
If this is set, our linter will use this to make sure that every other
|
||||
job with the same `sync-tag` is identical.
|
||||
timeout-minutes:
|
||||
required: false
|
||||
type: number
|
||||
default: 240
|
||||
description: |
|
||||
Set the maximum (in minutes) how long the workflow should take to finish
|
||||
use-gha:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
description: If set to any value, upload to GHA. Otherwise upload to S3.
|
||||
dashboard-tag:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
s3-bucket:
|
||||
description: S3 bucket to download artifact
|
||||
required: false
|
||||
type: string
|
||||
default: "gha-artifacts"
|
||||
aws-role-to-assume:
|
||||
description: role to assume for downloading artifacts
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
secrets:
|
||||
HUGGING_FACE_HUB_TOKEN:
|
||||
required: false
|
||||
description: |
|
||||
HF Auth token to avoid rate limits when downloading models or datasets from hub
|
||||
|
||||
env:
|
||||
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
# Don't run on forked repos or empty test matrix
|
||||
if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(inputs.test-matrix) }}
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
|
||||
- name: Linux Test
|
||||
id: linux-test
|
||||
uses: ./.github/actions/linux-test
|
||||
with:
|
||||
build-environment: ${{ inputs.build-environment }}
|
||||
test-matrix: ${{ inputs.test-matrix }}
|
||||
docker-image: ${{ inputs.docker-image }}
|
||||
sync-tag: ${{ inputs.sync-tag }}
|
||||
use-gha: ${{ inputs.use-gha }}
|
||||
dashboard-tag: ${{ inputs.dashboard-tag }}
|
||||
s3-bucket: ${{ inputs.s3-bucket }}
|
||||
aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
86
.github/workflows/_linux-test-rg.yml
vendored
Normal file
86
.github/workflows/_linux-test-rg.yml
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
name: linux-test-label
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-environment:
|
||||
required: true
|
||||
type: string
|
||||
description: Top-level label for what's being built/tested.
|
||||
test-matrix:
|
||||
required: true
|
||||
type: string
|
||||
description: JSON description of what test configs to run.
|
||||
docker-image:
|
||||
required: true
|
||||
type: string
|
||||
description: Docker image to run in.
|
||||
sync-tag:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
description: |
|
||||
If this is set, our linter will use this to make sure that every other
|
||||
job with the same `sync-tag` is identical.
|
||||
timeout-minutes:
|
||||
required: false
|
||||
type: number
|
||||
default: 240
|
||||
description: |
|
||||
Set the maximum (in minutes) how long the workflow should take to finish
|
||||
use-gha:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
description: If set to any value, upload to GHA. Otherwise upload to S3.
|
||||
dashboard-tag:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
s3-bucket:
|
||||
description: S3 bucket to download artifact
|
||||
required: false
|
||||
type: string
|
||||
default: "gha-artifacts"
|
||||
aws-role-to-assume:
|
||||
description: role to assume for downloading artifacts
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
secrets:
|
||||
HUGGING_FACE_HUB_TOKEN:
|
||||
required: false
|
||||
description: |
|
||||
HF Auth token to avoid rate limits when downloading models or datasets from hub
|
||||
|
||||
env:
|
||||
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
# Don't run on forked repos or empty test matrix
|
||||
if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(inputs.test-matrix) }}
|
||||
fail-fast: false
|
||||
runs-on:
|
||||
group: ${{ matrix.runner }}
|
||||
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
|
||||
- name: Linux Test
|
||||
id: linux-test
|
||||
uses: ./.github/actions/linux-test
|
||||
with:
|
||||
build-environment: ${{ inputs.build-environment }}
|
||||
test-matrix: ${{ inputs.test-matrix }}
|
||||
docker-image: ${{ inputs.docker-image }}
|
||||
sync-tag: ${{ inputs.sync-tag }}
|
||||
use-gha: ${{ inputs.use-gha }}
|
||||
dashboard-tag: ${{ inputs.dashboard-tag }}
|
||||
s3-bucket: ${{ inputs.s3-bucket }}
|
||||
aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
1
.github/workflows/_runner-determinator.yml
vendored
1
.github/workflows/_runner-determinator.yml
vendored
@ -54,7 +54,6 @@ jobs:
|
||||
# Hardcoding below is temporary for testing ALI runners
|
||||
# This file below should match the script found in .github/scripts/runner_determinator.py
|
||||
- name: Hardcode runner-determinator script
|
||||
id: hardcode-script
|
||||
run: |
|
||||
cat <<EOF > runner_determinator.py
|
||||
# flake8: noqa: G004
|
||||
|
||||
625
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
625
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
@ -751,118 +751,6 @@ jobs:
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_8-xpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.8"
|
||||
build_name: manywheel-py3_8-xpu
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_8-xpu-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: manywheel-py3_8-xpu-build
|
||||
runs-on: linux.idc.xpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.8"
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Setup XPU
|
||||
uses: ./.github/actions/setup-xpu
|
||||
- name: configure aws credentials
|
||||
id: aws_creds
|
||||
uses: aws-actions/configure-aws-credentials@v1.7.0
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
aws-region: us-east-1
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
- uses: actions/download-artifact@v3
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: manywheel-py3_8-xpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Checkout PyTorch
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
path: pytorch
|
||||
quiet-checkout: true
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
quiet-checkout: true
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: pytorch/manylinux2_28-builder:xpu-main
|
||||
- name: Test Pytorch binary
|
||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||
- name: Teardown XPU
|
||||
uses: ./.github/actions/teardown-xpu
|
||||
manywheel-py3_8-xpu-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_8-xpu-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.8"
|
||||
build_name: manywheel-py3_8-xpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_9-cpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -1577,118 +1465,6 @@ jobs:
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_9-xpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-xpu
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-xpu-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: manywheel-py3_9-xpu-build
|
||||
runs-on: linux.idc.xpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.9"
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Setup XPU
|
||||
uses: ./.github/actions/setup-xpu
|
||||
- name: configure aws credentials
|
||||
id: aws_creds
|
||||
uses: aws-actions/configure-aws-credentials@v1.7.0
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
aws-region: us-east-1
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
- uses: actions/download-artifact@v3
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: manywheel-py3_9-xpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Checkout PyTorch
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
path: pytorch
|
||||
quiet-checkout: true
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
quiet-checkout: true
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: pytorch/manylinux2_28-builder:xpu-main
|
||||
- name: Test Pytorch binary
|
||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||
- name: Teardown XPU
|
||||
uses: ./.github/actions/teardown-xpu
|
||||
manywheel-py3_9-xpu-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_9-xpu-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.9"
|
||||
build_name: manywheel-py3_9-xpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_10-cpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -2068,71 +1844,6 @@ jobs:
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_10-cuda12_1-full-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cuda12_1-full
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_1-full-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: manywheel-py3_10-cuda12_1-full-build
|
||||
uses: ./.github/workflows/_binary-test-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cuda12_1-full
|
||||
build_environment: linux-binary-manywheel
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cuda12_1-full-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_10-cuda12_1-full-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: cu121
|
||||
GPU_ARCH_VERSION: 12.1
|
||||
GPU_ARCH_TYPE: cuda
|
||||
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-cuda12_1-full
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_10-cuda12_4-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -2468,118 +2179,6 @@ jobs:
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_10-xpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-xpu
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-xpu-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: manywheel-py3_10-xpu-build
|
||||
runs-on: linux.idc.xpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.10"
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Setup XPU
|
||||
uses: ./.github/actions/setup-xpu
|
||||
- name: configure aws credentials
|
||||
id: aws_creds
|
||||
uses: aws-actions/configure-aws-credentials@v1.7.0
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
aws-region: us-east-1
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
- uses: actions/download-artifact@v3
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: manywheel-py3_10-xpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Checkout PyTorch
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
path: pytorch
|
||||
quiet-checkout: true
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
quiet-checkout: true
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: pytorch/manylinux2_28-builder:xpu-main
|
||||
- name: Test Pytorch binary
|
||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||
- name: Teardown XPU
|
||||
uses: ./.github/actions/teardown-xpu
|
||||
manywheel-py3_10-xpu-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_10-xpu-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.10"
|
||||
build_name: manywheel-py3_10-xpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-cpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -3294,118 +2893,6 @@ jobs:
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_11-xpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-xpu
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-xpu-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: manywheel-py3_11-xpu-build
|
||||
runs-on: linux.idc.xpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.11"
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Setup XPU
|
||||
uses: ./.github/actions/setup-xpu
|
||||
- name: configure aws credentials
|
||||
id: aws_creds
|
||||
uses: aws-actions/configure-aws-credentials@v1.7.0
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
aws-region: us-east-1
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
- uses: actions/download-artifact@v3
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: manywheel-py3_11-xpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Checkout PyTorch
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
path: pytorch
|
||||
quiet-checkout: true
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
quiet-checkout: true
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: pytorch/manylinux2_28-builder:xpu-main
|
||||
- name: Test Pytorch binary
|
||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||
- name: Teardown XPU
|
||||
uses: ./.github/actions/teardown-xpu
|
||||
manywheel-py3_11-xpu-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_11-xpu-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.11"
|
||||
build_name: manywheel-py3_11-xpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_12-cpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
@ -4120,118 +3607,6 @@ jobs:
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_12-xpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-xpu
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-xpu-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: manywheel-py3_12-xpu-build
|
||||
runs-on: linux.idc.xpu
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.12"
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Setup XPU
|
||||
uses: ./.github/actions/setup-xpu
|
||||
- name: configure aws credentials
|
||||
id: aws_creds
|
||||
uses: aws-actions/configure-aws-credentials@v1.7.0
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
aws-region: us-east-1
|
||||
- name: Login to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
- uses: actions/download-artifact@v3
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: manywheel-py3_12-xpu
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Checkout PyTorch
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
path: pytorch
|
||||
quiet-checkout: true
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: Checkout pytorch/builder
|
||||
uses: malfet/checkout@silent-checkout
|
||||
with:
|
||||
ref: main
|
||||
submodules: recursive
|
||||
repository: pytorch/builder
|
||||
path: builder
|
||||
quiet-checkout: true
|
||||
- name: Clean pytorch/builder checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: builder
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: pytorch/manylinux2_28-builder:xpu-main
|
||||
- name: Test Pytorch binary
|
||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||
- name: Teardown XPU
|
||||
uses: ./.github/actions/teardown-xpu
|
||||
manywheel-py3_12-xpu-upload: # Uploading
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
needs: manywheel-py3_12-xpu-test
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
BUILDER_ROOT: /builder
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: xpu
|
||||
GPU_ARCH_TYPE: xpu
|
||||
DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main
|
||||
DESIRED_DEVTOOLSET: cxx11-abi
|
||||
DESIRED_PYTHON: "3.12"
|
||||
build_name: manywheel-py3_12-xpu
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
|
||||
conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
|
||||
uses: ./.github/workflows/_binary-upload.yml
|
||||
|
||||
manywheel-py3_13-cpu-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
|
||||
6
.github/workflows/nightly.yml
vendored
6
.github/workflows/nightly.yml
vendored
@ -39,10 +39,10 @@ jobs:
|
||||
update-vision-commit-hash:
|
||||
runs-on: ubuntu-latest
|
||||
environment: update-commit-hash
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
steps:
|
||||
- name: update-vision-commit-hash
|
||||
uses: pytorch/test-infra/.github/actions/update-commit-hash@main
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
with:
|
||||
repo-name: vision
|
||||
branch: main
|
||||
@ -54,10 +54,10 @@ jobs:
|
||||
update-audio-commit-hash:
|
||||
runs-on: ubuntu-latest
|
||||
environment: update-commit-hash
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
steps:
|
||||
- name: update-audio-commit-hash
|
||||
uses: pytorch/test-infra/.github/actions/update-commit-hash@main
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
with:
|
||||
repo-name: audio
|
||||
branch: main
|
||||
@ -69,10 +69,10 @@ jobs:
|
||||
update-executorch-commit-hash:
|
||||
runs-on: ubuntu-latest
|
||||
environment: update-commit-hash
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
steps:
|
||||
- name: update-executorch-commit-hash
|
||||
uses: pytorch/test-infra/.github/actions/update-commit-hash@main
|
||||
if: ${{ github.event_name == 'schedule' }}
|
||||
with:
|
||||
repo-name: executorch
|
||||
branch: main
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
name: runner-determinator
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- .github/workflows/_runner-determinator.yaml
|
||||
- .github/workflows/_runner_determinator_script_sync.yaml
|
||||
- .github/workflows/scripts/runner_determinator.py
|
||||
|
||||
jobs:
|
||||
python-script-sync-check:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github
|
||||
|
||||
- name: Extract the script from runner_determinator
|
||||
run: |
|
||||
# Runner determinator files
|
||||
RUNNER_DETERMINATOR_WORKFLOW_FILE=.github/workflows/_runner-determinator.yml
|
||||
RUNNER_DETERMINATOR_PYTHON_SCRIPT_FILE=.github/scripts/runner_determinator.py
|
||||
|
||||
# Parse the job file, extract the script and run it, up to the final EOF,
|
||||
# to generate the python file in the local folder
|
||||
yq '.jobs.runner-determinator.steps[] | select(.id == "hardcode-script") | .run' \
|
||||
"${RUNNER_DETERMINATOR_WORKFLOW_FILE}" | sed '/^EOF$/q' | bash
|
||||
|
||||
set +e
|
||||
DIFF="$(diff "$(basename ${RUNNER_DETERMINATOR_PYTHON_SCRIPT_FILE})" ${RUNNER_DETERMINATOR_PYTHON_SCRIPT_FILE})"
|
||||
IS_DIFF=$?
|
||||
set -e
|
||||
if [ $IS_DIFF -eq 0 ]; then
|
||||
echo "Scripts are in sync! ^_^";
|
||||
else
|
||||
echo -e "Scripts are *NOT* in sync:\n ${DIFF}";
|
||||
exit 1
|
||||
fi
|
||||
19
.github/workflows/trunk.yml
vendored
19
.github/workflows/trunk.yml
vendored
@ -203,14 +203,25 @@ jobs:
|
||||
cuda-version: cpu
|
||||
test-matrix: ${{ needs.win-vs2019-cpu-py3-build.outputs.test-matrix }}
|
||||
|
||||
win-vs2019-cuda12_1-py3-build:
|
||||
name: win-vs2019-cuda12.1-py3
|
||||
win-vs2019-cuda11_8-py3-build:
|
||||
name: win-vs2019-cuda11.8-py3
|
||||
uses: ./.github/workflows/_win-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
build-environment: win-vs2019-cuda12.1-py3
|
||||
cuda-version: "12.1"
|
||||
build-environment: win-vs2019-cuda11.8-py3
|
||||
cuda-version: "11.8"
|
||||
sync-tag: win-cuda-build
|
||||
runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}windows.g5.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 2, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}windows.g5.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 3, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}windows.g5.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 4, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}windows.g5.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 5, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}windows.g5.4xlarge.nvidia.gpu" },
|
||||
{ config: "default", shard: 6, num_shards: 6, runner: "${{ needs.get-label-type.outputs.label-type }}windows.g5.4xlarge.nvidia.gpu" },
|
||||
{ config: "force_on_cpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" },
|
||||
]}
|
||||
|
||||
linux-focal-rocm6_1-py3_8-build:
|
||||
name: linux-focal-rocm6.1-py3.8
|
||||
|
||||
@ -38,7 +38,7 @@ init_command = [
|
||||
'--dry-run={{DRYRUN}}',
|
||||
'flake8==6.1.0',
|
||||
'flake8-bugbear==23.3.23',
|
||||
'flake8-comprehensions==3.15.0',
|
||||
'flake8-comprehensions==3.12.0',
|
||||
'flake8-executable==2.1.3',
|
||||
'flake8-logging-format==0.9.0',
|
||||
'flake8-pyi==23.3.1',
|
||||
@ -1531,6 +1531,10 @@ exclude_patterns = [
|
||||
'torch/signal/__init__.py',
|
||||
'torch/signal/windows/__init__.py',
|
||||
'torch/signal/windows/windows.py',
|
||||
'torch/sparse/__init__.py',
|
||||
'torch/sparse/_semi_structured_conversions.py',
|
||||
'torch/sparse/_triton_ops.py',
|
||||
'torch/sparse/semi_structured.py',
|
||||
'torch/special/__init__.py',
|
||||
'torch/testing/_internal/__init__.py',
|
||||
'torch/testing/_internal/autocast_test_lists.py',
|
||||
@ -1779,7 +1783,7 @@ init_command = [
|
||||
'python3',
|
||||
'tools/linter/adapters/pip_init.py',
|
||||
'--dry-run={{DRYRUN}}',
|
||||
'ruff==0.5.2',
|
||||
'ruff==0.5.0',
|
||||
]
|
||||
is_formatter = true
|
||||
|
||||
|
||||
@ -208,6 +208,7 @@ endif()
|
||||
include(CMakeDependentOption)
|
||||
option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
|
||||
option(BUILD_BINARY "Build C++ binaries" OFF)
|
||||
option(BUILD_DOCS "Build Caffe2 documentation" OFF)
|
||||
option(BUILD_CUSTOM_PROTOBUF
|
||||
"Build and use Caffe2's own protobuf under third_party" ON)
|
||||
option(BUILD_PYTHON "Build Python binaries" ON)
|
||||
@ -749,6 +750,7 @@ if(NOT TORCH_BUILD_VERSION)
|
||||
CACHE STRING "Torch build version" FORCE)
|
||||
endif()
|
||||
caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION})
|
||||
caffe2_parse_version_str(CAFFE2 ${TORCH_BUILD_VERSION})
|
||||
set(TORCH_SOVERSION "${TORCH_VERSION_MAJOR}.${TORCH_VERSION_MINOR}")
|
||||
|
||||
# ---[ CMake scripts + modules
|
||||
@ -1221,6 +1223,45 @@ endif()
|
||||
add_subdirectory(c10)
|
||||
add_subdirectory(caffe2)
|
||||
|
||||
# --[ Documentation
|
||||
if(BUILD_DOCS)
|
||||
# check if Doxygen is installed
|
||||
find_package(Doxygen)
|
||||
if(DOXYGEN_FOUND)
|
||||
message("Generating documentation")
|
||||
|
||||
set(DOXYGEN_C_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/.Doxyfile-c)
|
||||
set(DOXYGEN_C_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/Doxyfile-c)
|
||||
set(DOXYGEN_P_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/.Doxyfile-python)
|
||||
set(DOXYGEN_P_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/Doxyfile-python)
|
||||
|
||||
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/docs)
|
||||
file(REMOVE_RECURSE ${CMAKE_CURRENT_BINARY_DIR}/docs)
|
||||
endif()
|
||||
|
||||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/docs)
|
||||
configure_file(${DOXYGEN_C_IN} ${DOXYGEN_C_OUT} @ONLY)
|
||||
configure_file(${DOXYGEN_P_IN} ${DOXYGEN_P_OUT} @ONLY)
|
||||
|
||||
add_custom_target(
|
||||
doc_doxygen_c ALL
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_C_OUT}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
COMMENT "Generating C++ API documentation with Doxygen"
|
||||
VERBATIM)
|
||||
|
||||
add_custom_target(
|
||||
doc_doxygen_python ALL
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_P_OUT}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
COMMENT "Generating Python API documentation with Doxygen"
|
||||
VERBATIM)
|
||||
else()
|
||||
message(
|
||||
FATAL_ERROR "Doxygen needs to be installed to generate the documentation")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ---[ CMake related files Uninistall option.
|
||||
if(NOT TARGET caffe2_uninstall)
|
||||
configure_file(
|
||||
|
||||
18
CODEOWNERS
18
CODEOWNERS
@ -156,12 +156,12 @@ caffe2/utils/hip @jeffdaily @jithunnair-amd
|
||||
/torch/csrc/jit/python/init.cpp @mikaylagawarecki
|
||||
|
||||
# CUDA and CUDA math libraries
|
||||
aten/src/ATen/cuda/ @eqy @syed-ahmed
|
||||
aten/src/ATen/cudnn/ @eqy @syed-ahmed
|
||||
aten/src/ATen/native/cuda/ @eqy @syed-ahmed
|
||||
aten/src/ATen/native/cudnn/ @eqy @syed-ahmed
|
||||
c10/cuda @eqy @syed-ahmed
|
||||
torch/cuda/ @eqy @syed-ahmed
|
||||
torch/csrc/cuda/ @eqy @syed-ahmed
|
||||
torch/backends/cuda/ @eqy @syed-ahmed
|
||||
torch/backends/cudnn/ @eqy @syed-ahmed
|
||||
aten/src/ATen/cuda/ @eqy
|
||||
aten/src/ATen/cudnn/ @eqy
|
||||
aten/src/ATen/native/cuda/ @eqy
|
||||
aten/src/ATen/native/cudnn/ @eqy
|
||||
c10/cuda @eqy
|
||||
torch/cuda/ @eqy
|
||||
torch/csrc/cuda/ @eqy
|
||||
torch/backends/cuda/ @eqy
|
||||
torch/backends/cudnn/ @eqy
|
||||
|
||||
@ -3,7 +3,6 @@ from typing import Dict, List, Optional, Tuple
|
||||
import torch
|
||||
from torch import Tensor
|
||||
|
||||
|
||||
OUTPUT_DIR = "src/androidTest/assets/"
|
||||
|
||||
|
||||
|
||||
@ -119,7 +119,7 @@ class PytorchJni : public facebook::jni::HybridClass<PytorchJni> {
|
||||
}
|
||||
deviceType_ = deviceJniCodeToDeviceType(device);
|
||||
module_ = torch::jit::load(
|
||||
std::move(modelPath->toStdString()), std::nullopt, extra_files);
|
||||
std::move(modelPath->toStdString()), c10::nullopt, extra_files);
|
||||
if (has_extra) {
|
||||
static auto putMethod =
|
||||
facebook::jni::JMap<facebook::jni::JString, facebook::jni::JString>::
|
||||
|
||||
@ -84,9 +84,9 @@ class PytorchJni : public facebook::jni::HybridClass<PytorchJni> {
|
||||
}
|
||||
deviceType_ = deviceJniCodeToDeviceType(device);
|
||||
module_ = torch::jit::_load_for_mobile(
|
||||
std::move(modelPath->toStdString()), std::nullopt, extra_files);
|
||||
std::move(modelPath->toStdString()), c10::nullopt, extra_files);
|
||||
torch::jit::_load_extra_only_for_mobile(
|
||||
std::move(modelPath->toStdString()), std::nullopt, extra_files);
|
||||
std::move(modelPath->toStdString()), c10::nullopt, extra_files);
|
||||
if (has_extra) {
|
||||
static auto putMethod =
|
||||
facebook::jni::JMap<facebook::jni::JString, facebook::jni::JString>::
|
||||
|
||||
@ -2,7 +2,6 @@ from torchvision import models
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
print(torch.version.__version__)
|
||||
|
||||
resnet18 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
|
||||
|
||||
@ -9,7 +9,6 @@ from torchvision import models
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
# Download and trace the model.
|
||||
model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)
|
||||
model.eval()
|
||||
|
||||
@ -82,7 +82,6 @@ using acc_type = typename AccumulateType<T, is_cuda>::type;
|
||||
using type = acc_t; \
|
||||
};
|
||||
#define MPS_ACC_TYPE(t, acc_t) ACC_TYPE(t, acc_t, c10::DeviceType::MPS)
|
||||
#define XPU_ACC_TYPE(t, acc_t) ACC_TYPE(t, acc_t, c10::DeviceType::XPU)
|
||||
#define CUDA_ACC_TYPE(t, acc_t) ACC_TYPE(t, acc_t, c10::DeviceType::CUDA)
|
||||
#define CPU_ACC_TYPE(t, acc_t) ACC_TYPE(t, acc_t, c10::DeviceType::CPU)
|
||||
|
||||
@ -105,25 +104,6 @@ MPS_ACC_TYPE(c10::complex<Half>, c10::complex<float>);
|
||||
MPS_ACC_TYPE(c10::complex<float>, c10::complex<float>);
|
||||
MPS_ACC_TYPE(c10::complex<double>, c10::complex<float>);
|
||||
|
||||
XPU_ACC_TYPE(BFloat16, float);
|
||||
XPU_ACC_TYPE(Half, float);
|
||||
XPU_ACC_TYPE(Float8_e5m2, float);
|
||||
XPU_ACC_TYPE(Float8_e4m3fn, float);
|
||||
XPU_ACC_TYPE(Float8_e5m2fnuz, float);
|
||||
XPU_ACC_TYPE(Float8_e4m3fnuz, float);
|
||||
XPU_ACC_TYPE(float, float);
|
||||
XPU_ACC_TYPE(double, double);
|
||||
XPU_ACC_TYPE(int8_t, int64_t);
|
||||
XPU_ACC_TYPE(uint8_t, int64_t);
|
||||
XPU_ACC_TYPE(char, int64_t);
|
||||
XPU_ACC_TYPE(int16_t, int64_t);
|
||||
XPU_ACC_TYPE(int32_t, int64_t);
|
||||
XPU_ACC_TYPE(int64_t, int64_t);
|
||||
XPU_ACC_TYPE(bool, bool);
|
||||
XPU_ACC_TYPE(c10::complex<Half>, c10::complex<float>);
|
||||
XPU_ACC_TYPE(c10::complex<float>, c10::complex<float>);
|
||||
XPU_ACC_TYPE(c10::complex<double>, c10::complex<double>);
|
||||
|
||||
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||||
CUDA_ACC_TYPE(half, float);
|
||||
#endif
|
||||
|
||||
@ -283,7 +283,7 @@ at::BlasBackend Context::blasPreferredBackend() {
|
||||
if (blas_preferred_backend == at::BlasBackend::Cublaslt) {
|
||||
static const bool hipblaslt_unsupported = []() {
|
||||
static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
|
||||
for (auto index: c10::irange(getNumGPUs())) {
|
||||
for (auto index = 0; index < at::getNumGPUs(); index++) {
|
||||
if (!detail::getCUDAHooks().isGPUArch(index, archs)) {
|
||||
TORCH_WARN_ONCE(
|
||||
"Attempting to use hipBLASLt on an unsupported architecture! "
|
||||
|
||||
@ -73,8 +73,6 @@ class TORCH_API Context {
|
||||
return at::detail::getPrivateUse1Hooks();
|
||||
} else if (device_type == at::kMTIA) {
|
||||
return at::detail::getMTIAHooks();
|
||||
} else if (device_type == at::kHIP) {
|
||||
return at::detail::getHIPHooks();
|
||||
} else {
|
||||
AT_ERROR(
|
||||
c10::DeviceTypeName(device_type), " device type not an accelerator.");
|
||||
@ -96,22 +94,8 @@ class TORCH_API Context {
|
||||
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
|
||||
}
|
||||
}
|
||||
bool isPinnedPtr(
|
||||
const void* data,
|
||||
std::optional<DeviceType> device_type = std::nullopt) {
|
||||
auto opt_device_type =
|
||||
device_type.has_value() ? device_type.value() : at::getAccelerator();
|
||||
if (!opt_device_type.has_value() || // there is no accelerator
|
||||
!at::isAccelerator(
|
||||
opt_device_type.value())) { // passed device not an accelerator
|
||||
return false;
|
||||
}
|
||||
return getAcceleratorHooksInterface(opt_device_type.value())
|
||||
.isPinnedPtr(data);
|
||||
}
|
||||
Allocator* getPinnedMemoryAllocator(
|
||||
std::optional<DeviceType> device_type = std::nullopt) {
|
||||
return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator();
|
||||
static bool isPinnedPtr(const void* data) {
|
||||
return detail::getCUDAHooks().isPinnedPtr(data);
|
||||
}
|
||||
static bool hasOpenMP();
|
||||
static bool hasMKL();
|
||||
@ -432,73 +416,73 @@ class TORCH_API Context {
|
||||
|
||||
TORCH_API Context& globalContext();
|
||||
|
||||
inline void init() {
|
||||
static inline void init() {
|
||||
globalContext();
|
||||
}
|
||||
|
||||
TORCH_API Allocator* getCPUAllocator();
|
||||
|
||||
inline DeprecatedTypeProperties& getDeprecatedTypeProperties(
|
||||
static inline DeprecatedTypeProperties& getDeprecatedTypeProperties(
|
||||
Backend p,
|
||||
ScalarType s) {
|
||||
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
|
||||
p, s);
|
||||
}
|
||||
|
||||
inline DeprecatedTypeProperties& CPU(ScalarType s) {
|
||||
static inline DeprecatedTypeProperties& CPU(ScalarType s) {
|
||||
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
|
||||
Backend::CPU, s);
|
||||
}
|
||||
|
||||
inline DeprecatedTypeProperties& CUDA(ScalarType s) {
|
||||
static inline DeprecatedTypeProperties& CUDA(ScalarType s) {
|
||||
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
|
||||
Backend::CUDA, s);
|
||||
}
|
||||
|
||||
inline DeprecatedTypeProperties& HIP(ScalarType s) {
|
||||
static inline DeprecatedTypeProperties& HIP(ScalarType s) {
|
||||
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
|
||||
Backend::HIP, s);
|
||||
}
|
||||
|
||||
inline DeprecatedTypeProperties& MPS(ScalarType s) {
|
||||
static inline DeprecatedTypeProperties& MPS(ScalarType s) {
|
||||
return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties(
|
||||
Backend::MPS, s);
|
||||
}
|
||||
|
||||
inline bool hasCUDA() {
|
||||
static inline bool hasCUDA() {
|
||||
return globalContext().hasCUDA();
|
||||
}
|
||||
|
||||
inline bool hasMTIA() {
|
||||
static inline bool hasMTIA() {
|
||||
return globalContext().hasMTIA();
|
||||
}
|
||||
|
||||
inline bool hasHIP() {
|
||||
static inline bool hasHIP() {
|
||||
return globalContext().hasHIP();
|
||||
}
|
||||
|
||||
inline bool hasIPU() {
|
||||
static inline bool hasIPU() {
|
||||
return globalContext().hasIPU();
|
||||
}
|
||||
|
||||
inline bool hasXLA() {
|
||||
static inline bool hasXLA() {
|
||||
return globalContext().hasXLA();
|
||||
}
|
||||
|
||||
inline bool hasMPS() {
|
||||
static inline bool hasMPS() {
|
||||
return globalContext().hasMPS();
|
||||
}
|
||||
|
||||
inline bool hasMAIA() {
|
||||
static inline bool hasMAIA() {
|
||||
return globalContext().hasMAIA();
|
||||
}
|
||||
|
||||
inline bool hasXPU() {
|
||||
static inline bool hasXPU() {
|
||||
return globalContext().hasXPU();
|
||||
}
|
||||
|
||||
// Despite its name, this function returns the number of *CUDA* GPUs.
|
||||
inline size_t getNumGPUs() {
|
||||
static inline size_t getNumGPUs() {
|
||||
// WARNING: DO NOT ADD LOGIC TO HANDLE OTHER DEVICE TYPES TO THIS
|
||||
// FUNCTION. If you are interested in interrogating the number of
|
||||
// devices for a specific device type, add that function to the
|
||||
@ -517,27 +501,27 @@ inline size_t getNumGPUs() {
|
||||
}
|
||||
}
|
||||
|
||||
inline bool hasOpenMP() {
|
||||
static inline bool hasOpenMP() {
|
||||
return globalContext().hasOpenMP();
|
||||
}
|
||||
|
||||
inline bool hasMKL() {
|
||||
static inline bool hasMKL() {
|
||||
return globalContext().hasMKL();
|
||||
}
|
||||
|
||||
inline bool hasLAPACK() {
|
||||
static inline bool hasLAPACK() {
|
||||
return globalContext().hasLAPACK();
|
||||
}
|
||||
|
||||
inline bool hasMAGMA() {
|
||||
static inline bool hasMAGMA() {
|
||||
return globalContext().hasMAGMA();
|
||||
}
|
||||
|
||||
inline bool hasMKLDNN() {
|
||||
static inline bool hasMKLDNN() {
|
||||
return globalContext().hasMKLDNN();
|
||||
}
|
||||
|
||||
inline void manual_seed(uint64_t seed) {
|
||||
static inline void manual_seed(uint64_t seed) {
|
||||
auto gen = globalContext().defaultGenerator(c10::DeviceType::CPU);
|
||||
{
|
||||
// See Note [Acquire lock when using random generators]
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
#include <ATen/DeviceAccelerator.h>
|
||||
namespace at {
|
||||
|
||||
std::optional<DeviceType> getAccelerator(bool checked) {
|
||||
C10_API std::optional<DeviceType> getAccelerator(bool checked) {
|
||||
#define DETECT_AND_ASSIGN_ACCELERATOR(device_name) \
|
||||
if (at::has##device_name()) { \
|
||||
device_type = k##device_name; \
|
||||
@ -25,8 +25,6 @@ std::optional<DeviceType> getAccelerator(bool checked) {
|
||||
DETECT_AND_ASSIGN_ACCELERATOR(CUDA)
|
||||
DETECT_AND_ASSIGN_ACCELERATOR(MTIA)
|
||||
DETECT_AND_ASSIGN_ACCELERATOR(XPU)
|
||||
DETECT_AND_ASSIGN_ACCELERATOR(HIP)
|
||||
DETECT_AND_ASSIGN_ACCELERATOR(MPS)
|
||||
if (checked) {
|
||||
TORCH_CHECK(
|
||||
device_type, "Cannot access accelerator device when none is available.")
|
||||
@ -36,18 +34,4 @@ std::optional<DeviceType> getAccelerator(bool checked) {
|
||||
#undef DETECT_AND_ASSIGN_ACCELERATOR
|
||||
}
|
||||
|
||||
bool isAccelerator(c10::DeviceType d) {
|
||||
switch (d) {
|
||||
case at::kCUDA:
|
||||
case at::kMTIA:
|
||||
case at::kXPU:
|
||||
case at::kHIP:
|
||||
case at::kMPS:
|
||||
case at::kPrivateUse1:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace at
|
||||
|
||||
@ -13,7 +13,9 @@
|
||||
// - It provides a set of common APIs as defined by AcceleratorHooksInterface
|
||||
//
|
||||
// As of today, accelerator devices are (in no particular order):
|
||||
// CUDA, MTIA, XPU, HIP, MPS, PrivateUse1
|
||||
// CUDA, MTIA, XPU, PrivateUse1
|
||||
// We want to add once all the proper APIs are supported and tested:
|
||||
// HIP, MPS
|
||||
|
||||
namespace at {
|
||||
|
||||
@ -22,6 +24,4 @@ namespace at {
|
||||
// When checked is true, the returned optional always has a value.
|
||||
TORCH_API std::optional<c10::DeviceType> getAccelerator(bool checked = false);
|
||||
|
||||
TORCH_API bool isAccelerator(c10::DeviceType d);
|
||||
|
||||
} // namespace at
|
||||
|
||||
@ -499,7 +499,7 @@ inline Tensor sum_to(
|
||||
return _sum_to(std::move(tensor), shape, always_return_non_view);
|
||||
}
|
||||
|
||||
inline bool is_expandable_to(
|
||||
static inline bool is_expandable_to(
|
||||
SymIntArrayRef shape,
|
||||
c10::SymIntArrayRef desired) {
|
||||
size_t ndim = shape.size();
|
||||
@ -517,7 +517,7 @@ inline bool is_expandable_to(
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool is_expandable_to(IntArrayRef shape, IntArrayRef desired) {
|
||||
static inline bool is_expandable_to(IntArrayRef shape, IntArrayRef desired) {
|
||||
auto sym_shape = c10::SymIntArrayRef(
|
||||
reinterpret_cast<const c10::SymInt*>(shape.data()), shape.size());
|
||||
auto sym_desired = c10::SymIntArrayRef(
|
||||
|
||||
@ -303,7 +303,7 @@ Tensor FunctionalInverses::_nested_view_from_buffer_inverse(const Tensor& base,
|
||||
return Tensor();
|
||||
}
|
||||
|
||||
Tensor FunctionalInverses::_nested_view_from_jagged_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, const Tensor& offsets, const Tensor& dummy, const std::optional<Tensor>& lengths, int64_t ragged_idx, const std::optional<Tensor>& min_seqlen, const std::optional<Tensor>& max_seqlen) {
|
||||
Tensor FunctionalInverses::_nested_view_from_jagged_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, const Tensor& offsets, const Tensor& dummy, const std::optional<Tensor>& lengths, int64_t ragged_idx, const c10::optional<Tensor>& min_seqlen, const c10::optional<Tensor>& max_seqlen) {
|
||||
auto values = at::_nested_get_values(mutated_view);
|
||||
if (inverse_return_mode != InverseReturnMode::NeverView) {
|
||||
return values;
|
||||
@ -321,8 +321,8 @@ Tensor FunctionalInverses::_nested_get_values_inverse(const Tensor& base, const
|
||||
auto max_seqlen = at::_nested_get_max_seqlen(base);
|
||||
auto nt = at::_nested_view_from_jagged(
|
||||
mutated_view, offsets, dummy, lengths, ragged_idx,
|
||||
(min_seqlen.defined() ? std::optional<Tensor>(min_seqlen) : std::nullopt),
|
||||
(max_seqlen.defined() ? std::optional<Tensor>(max_seqlen) : std::nullopt));
|
||||
(min_seqlen.defined() ? c10::optional<Tensor>(min_seqlen) : std::nullopt),
|
||||
(max_seqlen.defined() ? c10::optional<Tensor>(max_seqlen) : std::nullopt));
|
||||
|
||||
if (inverse_return_mode != InverseReturnMode::NeverView) {
|
||||
return nt;
|
||||
|
||||
@ -62,7 +62,7 @@ static bool is_allowed_dim_on_scalar_tensor(int64_t dim) {
|
||||
return dim == 0 || dim == -1;
|
||||
}
|
||||
|
||||
Tensor sum_batching_rule(const Tensor& self, OptionalIntArrayRef opt_dims, bool keepdim, std::optional<ScalarType> dtype) {
|
||||
Tensor sum_batching_rule(const Tensor& self, OptionalIntArrayRef opt_dims, bool keepdim, optional<ScalarType> dtype) {
|
||||
if (opt_dims.has_value()) {
|
||||
auto dims = opt_dims.value();
|
||||
// PyTorch has a special case where sum(scalar_tensor, dim=0) does not fail
|
||||
@ -198,7 +198,7 @@ std::vector<Tensor> chunk_batching_rule(const Tensor& self, int64_t chunks, int6
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor clamp_batching_rule(const Tensor& self, const std::optional<Scalar>& min, const std::optional<Scalar>& max) {
|
||||
Tensor clamp_batching_rule(const Tensor& self, const optional<Scalar>& min, const optional<Scalar>& max) {
|
||||
auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
|
||||
auto result = at::clamp(self_physical.tensor(), min, max);
|
||||
return self_physical.getPhysicalToLogicalMap().apply(result);
|
||||
@ -508,11 +508,11 @@ static void checkBatchDimsAtFrontInLayout(IntArrayRef physical_strides, int64_t
|
||||
// given (sizes, strides, storage_offset) returns the maximum location that
|
||||
// can be indexed (or nullopt if such a location doesn't exist, e.g., tensors
|
||||
// with zero-size dims).
|
||||
static std::optional<int64_t> maximum_indexable_location(
|
||||
static optional<int64_t> maximum_indexable_location(
|
||||
IntArrayRef sizes, IntArrayRef strides, int64_t storage_offset) {
|
||||
auto result = native::storage_size_for(sizes, strides);
|
||||
if (result == 0) {
|
||||
return std::nullopt;
|
||||
return nullopt;
|
||||
}
|
||||
return result + storage_offset;
|
||||
}
|
||||
@ -526,7 +526,7 @@ static void checkBasicAsStridedValidForSlice(
|
||||
int64_t num_batch_dims,
|
||||
IntArrayRef sizes,
|
||||
IntArrayRef strides,
|
||||
std::optional<int64_t> maybe_storage_offset) {
|
||||
optional<int64_t> maybe_storage_offset) {
|
||||
auto slice_sizes = physical_tensor.sizes().slice(num_batch_dims);
|
||||
auto slice_strides = physical_tensor.strides().slice(num_batch_dims);
|
||||
auto base_offset = physical_tensor.storage_offset();
|
||||
@ -614,7 +614,7 @@ Tensor as_strided_batching_rule(
|
||||
const Tensor& tensor,
|
||||
IntArrayRef sizes,
|
||||
IntArrayRef strides,
|
||||
std::optional<int64_t> storage_offset) {
|
||||
optional<int64_t> storage_offset) {
|
||||
auto physical_view = at::MultiBatchVmapTransform::logicalToPhysical(tensor);
|
||||
auto num_batch_dims = physical_view.numBatchDims();
|
||||
auto physical_sizes = physical_view.getPhysicalShape(sizes);
|
||||
@ -763,7 +763,7 @@ Tensor pow_scalar_Tensor_batching_rule(const Scalar& other, const Tensor& self)
|
||||
return makeBatched(output_physical, BatchDims(old_bdims.begin(), old_bdims.end()));
|
||||
}
|
||||
|
||||
Tensor clone_batching_rule(const Tensor& self, std::optional<MemoryFormat> memory_format) {
|
||||
Tensor clone_batching_rule(const Tensor& self, optional<MemoryFormat> memory_format) {
|
||||
// Memory format support is a little tricky because vmap is allowed to move
|
||||
// around batch dimensions and some memory formats are rank-dependent.
|
||||
// Another weird case is:
|
||||
@ -958,12 +958,12 @@ Tensor stack_batching_rule(TensorList tensors, int64_t dim) {
|
||||
// unwrap_and_call<..., at::to> because at::to takes TensorOptions& (!!)
|
||||
Tensor to_dtype_layout_batching_rule(
|
||||
const Tensor& self,
|
||||
std::optional<ScalarType> dtype,
|
||||
std::optional<Layout> layout,
|
||||
std::optional<Device> device,
|
||||
std::optional<bool> pin_memory,
|
||||
optional<ScalarType> dtype,
|
||||
optional<Layout> layout,
|
||||
optional<Device> device,
|
||||
optional<bool> pin_memory,
|
||||
bool non_blocking, bool copy,
|
||||
std::optional<MemoryFormat> memory_format) {
|
||||
optional<MemoryFormat> memory_format) {
|
||||
auto options = TensorOptions()
|
||||
.dtype(dtype)
|
||||
.layout(layout)
|
||||
@ -978,10 +978,10 @@ Tensor to_dtype_layout_batching_rule(
|
||||
Tensor new_zeros_batching_rule(
|
||||
const Tensor& self,
|
||||
IntArrayRef size,
|
||||
std::optional<ScalarType> dtype,
|
||||
std::optional<Layout> layout,
|
||||
std::optional<Device> device,
|
||||
std::optional<bool> pin_memory) {
|
||||
optional<ScalarType> dtype,
|
||||
optional<Layout> layout,
|
||||
optional<Device> device,
|
||||
optional<bool> pin_memory) {
|
||||
auto physical_view = MultiBatchVmapTransform::logicalToPhysical(self);
|
||||
auto physical_size = physical_view.getPhysicalShape(size);
|
||||
auto options = TensorOptions()
|
||||
@ -1010,10 +1010,10 @@ Tensor new_empty_strided_batching_rule(
|
||||
const Tensor& self,
|
||||
IntArrayRef size,
|
||||
IntArrayRef stride,
|
||||
std::optional<ScalarType> dtype,
|
||||
std::optional<Layout> layout,
|
||||
std::optional<Device> device,
|
||||
std::optional<bool> pin_memory) {
|
||||
optional<ScalarType> dtype,
|
||||
optional<Layout> layout,
|
||||
optional<Device> device,
|
||||
optional<bool> pin_memory) {
|
||||
auto physical_view = MultiBatchVmapTransform::logicalToPhysical(self);
|
||||
auto physical_size = physical_view.getPhysicalShape(size);
|
||||
|
||||
@ -1181,9 +1181,9 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
|
||||
m.impl(name, unwrap_and_call_method< \
|
||||
to_type, &Tensor::to, __VA_ARGS__>);\
|
||||
}
|
||||
TO_BATCHING_RULE("to.device", Device, ScalarType, bool, bool, std::optional<MemoryFormat>)
|
||||
TO_BATCHING_RULE("to.dtype", ScalarType, bool, bool, std::optional<MemoryFormat>)
|
||||
TO_BATCHING_RULE("to.other", const Tensor&, bool, bool, std::optional<MemoryFormat>)
|
||||
TO_BATCHING_RULE("to.device", Device, ScalarType, bool, bool, optional<MemoryFormat>)
|
||||
TO_BATCHING_RULE("to.dtype", ScalarType, bool, bool, optional<MemoryFormat>)
|
||||
TO_BATCHING_RULE("to.other", const Tensor&, bool, bool, optional<MemoryFormat>)
|
||||
m.impl("to.dtype_layout", to_dtype_layout_batching_rule);
|
||||
#undef TO_BATCHING_RULE
|
||||
m.impl("clone", clone_batching_rule);
|
||||
|
||||
@ -33,15 +33,15 @@ namespace at {
|
||||
_(==, x.eq(y), y.eq(x)) \
|
||||
_(!=, x.ne(y), y.ne(x))
|
||||
|
||||
#define DEFINE_OPERATOR(op, body, reverse_scalar_body) \
|
||||
inline Tensor operator op(const Tensor& x, const Tensor& y) { \
|
||||
return body; \
|
||||
} \
|
||||
inline Tensor operator op(const Tensor& x, const Scalar& y) { \
|
||||
return body; \
|
||||
} \
|
||||
inline Tensor operator op(const Scalar& x, const Tensor& y) { \
|
||||
return reverse_scalar_body; \
|
||||
#define DEFINE_OPERATOR(op, body, reverse_scalar_body) \
|
||||
static inline Tensor operator op(const Tensor& x, const Tensor& y) { \
|
||||
return body; \
|
||||
} \
|
||||
static inline Tensor operator op(const Tensor& x, const Scalar& y) { \
|
||||
return body; \
|
||||
} \
|
||||
static inline Tensor operator op(const Scalar& x, const Tensor& y) { \
|
||||
return reverse_scalar_body; \
|
||||
}
|
||||
|
||||
AT_FORALL_BINARY_OPS(DEFINE_OPERATOR)
|
||||
|
||||
@ -65,9 +65,7 @@ inline bool areAnyOptionalTensorSubclassLike(
|
||||
if (c10::impl::dispatch_mode_enabled())
|
||||
return true;
|
||||
return std::any_of(
|
||||
tensors.begin(),
|
||||
tensors.end(),
|
||||
[](const std::optional<Tensor>& opt_tensor) {
|
||||
tensors.begin(), tensors.end(), [](const optional<Tensor>& opt_tensor) {
|
||||
return (
|
||||
opt_tensor.has_value() && isTensorSubclassLike(opt_tensor.value()));
|
||||
});
|
||||
|
||||
@ -113,12 +113,12 @@
|
||||
|
||||
namespace at::tracer::impl {
|
||||
|
||||
inline bool is_dispatch_enabled() {
|
||||
static inline bool is_dispatch_enabled() {
|
||||
return c10::impl::tls_is_dispatch_key_included(at::DispatchKey::Tracer) &&
|
||||
!c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Tracer);
|
||||
}
|
||||
|
||||
inline void set_dispatch_enabled(bool enabled) {
|
||||
static inline void set_dispatch_enabled(bool enabled) {
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
!c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Tracer),
|
||||
"Cannot enable tracing within the scope of NoTracerDispatchMode!");
|
||||
|
||||
@ -29,7 +29,7 @@ TORCH_API int _crash_if_asan(int);
|
||||
// Converts a TensorList (i.e. ArrayRef<Tensor> to vector of TensorImpl*)
|
||||
// NB: This is ONLY used by legacy TH bindings, and ONLY used by cat.
|
||||
// Once cat is ported entirely to ATen this can be deleted!
|
||||
inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(
|
||||
static inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(
|
||||
ArrayRef<Tensor> tensors,
|
||||
const char* name,
|
||||
int pos,
|
||||
|
||||
@ -42,70 +42,70 @@ TORCH_LIBRARY_IMPL(aten, VmapMode, m) {
|
||||
#define TENSOROPTIONS std::optional<c10::ScalarType>, std::optional<c10::Layout>, std::optional<c10::Device>, std::optional<bool>
|
||||
|
||||
// random operations (out-of-place)
|
||||
m.impl("bernoulli", unsupportedRandomOp<const Tensor&, std::optional<Generator>>);
|
||||
m.impl("bernoulli.out", unsupportedRandomOp_<const Tensor&, std::optional<Generator>, Tensor&>);
|
||||
m.impl("bernoulli.p", unsupportedRandomOp<const Tensor&, double, std::optional<Generator>>);
|
||||
m.impl("bernoulli_.Tensor", unsupportedRandomOp_<Tensor&, const Tensor&, std::optional<Generator>>);
|
||||
m.impl("bernoulli_.float", unsupportedRandomOp_<Tensor&, double, std::optional<Generator>>);
|
||||
m.impl("bernoulli", unsupportedRandomOp<const Tensor&, optional<Generator>>);
|
||||
m.impl("bernoulli.out", unsupportedRandomOp_<const Tensor&, optional<Generator>, Tensor&>);
|
||||
m.impl("bernoulli.p", unsupportedRandomOp<const Tensor&, double, optional<Generator>>);
|
||||
m.impl("bernoulli_.Tensor", unsupportedRandomOp_<Tensor&, const Tensor&, optional<Generator>>);
|
||||
m.impl("bernoulli_.float", unsupportedRandomOp_<Tensor&, double, optional<Generator>>);
|
||||
|
||||
m.impl("cauchy_", unsupportedRandomOp_<Tensor&, double, double, std::optional<Generator>>);
|
||||
m.impl("exponential_", unsupportedRandomOp_<Tensor&, double, std::optional<Generator>>);
|
||||
m.impl("geometric_", unsupportedRandomOp_<Tensor&, double, std::optional<Generator>>);
|
||||
m.impl("log_normal_", unsupportedRandomOp_<Tensor&, double, double, std::optional<Generator>>);
|
||||
m.impl("multinomial", unsupportedRandomOp<const Tensor&, int64_t, bool, std::optional<Generator>>);
|
||||
m.impl("multinomial.out", unsupportedRandomOp_<const Tensor&, int64_t, bool, std::optional<Generator>, Tensor&>);
|
||||
m.impl("cauchy_", unsupportedRandomOp_<Tensor&, double, double, optional<Generator>>);
|
||||
m.impl("exponential_", unsupportedRandomOp_<Tensor&, double, optional<Generator>>);
|
||||
m.impl("geometric_", unsupportedRandomOp_<Tensor&, double, optional<Generator>>);
|
||||
m.impl("log_normal_", unsupportedRandomOp_<Tensor&, double, double, optional<Generator>>);
|
||||
m.impl("multinomial", unsupportedRandomOp<const Tensor&, int64_t, bool, optional<Generator>>);
|
||||
m.impl("multinomial.out", unsupportedRandomOp_<const Tensor&, int64_t, bool, optional<Generator>, Tensor&>);
|
||||
|
||||
m.impl("normal.Tensor_float", unsupportedRandomOp<const Tensor&, double, std::optional<Generator>>);
|
||||
m.impl("normal.Tensor_float_out", unsupportedRandomOp_<const Tensor&, double, std::optional<Generator>, Tensor&>);
|
||||
m.impl("normal.float_Tensor_out", unsupportedRandomOp_<double, const Tensor&, std::optional<Generator>, Tensor&>);
|
||||
m.impl("normal.float_Tensor", unsupportedRandomOp<double, const Tensor&, std::optional<Generator>>);
|
||||
m.impl("normal.Tensor_Tensor", unsupportedRandomOp<const Tensor&, const Tensor&, std::optional<Generator>>);
|
||||
m.impl("normal.Tensor_Tensor_out", unsupportedRandomOp_<const Tensor&, const Tensor&, std::optional<Generator>, Tensor&>);
|
||||
m.impl("normal.float_float", unsupportedRandomOp<double, double, IntArrayRef, std::optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("normal.float_float_out", unsupportedRandomOp_<double, double, IntArrayRef, std::optional<Generator>, Tensor&>);
|
||||
m.impl("normal_", unsupportedRandomOp_<Tensor&, double, double, std::optional<Generator>>);
|
||||
m.impl("normal.Tensor_float", unsupportedRandomOp<const Tensor&, double, optional<Generator>>);
|
||||
m.impl("normal.Tensor_float_out", unsupportedRandomOp_<const Tensor&, double, optional<Generator>, Tensor&>);
|
||||
m.impl("normal.float_Tensor_out", unsupportedRandomOp_<double, const Tensor&, optional<Generator>, Tensor&>);
|
||||
m.impl("normal.float_Tensor", unsupportedRandomOp<double, const Tensor&, optional<Generator>>);
|
||||
m.impl("normal.Tensor_Tensor", unsupportedRandomOp<const Tensor&, const Tensor&, optional<Generator>>);
|
||||
m.impl("normal.Tensor_Tensor_out", unsupportedRandomOp_<const Tensor&, const Tensor&, optional<Generator>, Tensor&>);
|
||||
m.impl("normal.float_float", unsupportedRandomOp<double, double, IntArrayRef, optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("normal.float_float_out", unsupportedRandomOp_<double, double, IntArrayRef, optional<Generator>, Tensor&>);
|
||||
m.impl("normal_", unsupportedRandomOp_<Tensor&, double, double, optional<Generator>>);
|
||||
|
||||
m.impl("poisson", unsupportedRandomOp<const Tensor&, std::optional<Generator>>);
|
||||
m.impl("poisson", unsupportedRandomOp<const Tensor&, optional<Generator>>);
|
||||
|
||||
m.impl("random_.from", unsupportedRandomOp_<Tensor&, int64_t, std::optional<int64_t>, std::optional<Generator>>);
|
||||
m.impl("random_.to", unsupportedRandomOp_<Tensor&, int64_t, std::optional<Generator>>);
|
||||
m.impl("random_", unsupportedRandomOp_<Tensor&, std::optional<Generator>>);
|
||||
m.impl("random_.from", unsupportedRandomOp_<Tensor&, int64_t, optional<int64_t>, optional<Generator>>);
|
||||
m.impl("random_.to", unsupportedRandomOp_<Tensor&, int64_t, optional<Generator>>);
|
||||
m.impl("random_", unsupportedRandomOp_<Tensor&, optional<Generator>>);
|
||||
|
||||
m.impl("rand_like", unsupportedRandomOp<const Tensor&, TENSOROPTIONS, std::optional<MemoryFormat>>);
|
||||
m.impl("randn_like", unsupportedRandomOp<const Tensor&, TENSOROPTIONS, std::optional<MemoryFormat>>);
|
||||
m.impl("rand_like", unsupportedRandomOp<const Tensor&, TENSOROPTIONS, optional<MemoryFormat>>);
|
||||
m.impl("randn_like", unsupportedRandomOp<const Tensor&, TENSOROPTIONS, optional<MemoryFormat>>);
|
||||
|
||||
m.impl("randint_like", unsupportedRandomOp<const Tensor&, int64_t, TENSOROPTIONS, std::optional<MemoryFormat>>);
|
||||
m.impl("randint_like.low_dtype", unsupportedRandomOp<const Tensor&, int64_t, int64_t, TENSOROPTIONS, std::optional<MemoryFormat>>);
|
||||
m.impl("randint_like", unsupportedRandomOp<const Tensor&, int64_t, TENSOROPTIONS, optional<MemoryFormat>>);
|
||||
m.impl("randint_like.low_dtype", unsupportedRandomOp<const Tensor&, int64_t, int64_t, TENSOROPTIONS, optional<MemoryFormat>>);
|
||||
|
||||
m.impl("rand", unsupportedRandomOp<IntArrayRef, TENSOROPTIONS>);
|
||||
m.impl("rand.generator", unsupportedRandomOp<IntArrayRef, std::optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("rand.names", unsupportedRandomOp<IntArrayRef, std::optional<DimnameList>, TENSOROPTIONS>);
|
||||
m.impl("rand.generator_with_names", unsupportedRandomOp<IntArrayRef, std::optional<Generator>, std::optional<DimnameList>, TENSOROPTIONS>);
|
||||
m.impl("rand.generator", unsupportedRandomOp<IntArrayRef, optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("rand.names", unsupportedRandomOp<IntArrayRef, optional<DimnameList>, TENSOROPTIONS>);
|
||||
m.impl("rand.generator_with_names", unsupportedRandomOp<IntArrayRef, optional<Generator>, optional<DimnameList>, TENSOROPTIONS>);
|
||||
m.impl("rand.out", unsupportedRandomOp_<IntArrayRef, Tensor&>);
|
||||
m.impl("rand.generator_out", unsupportedRandomOp_<IntArrayRef, std::optional<Generator>, Tensor&>);
|
||||
m.impl("rand.generator_out", unsupportedRandomOp_<IntArrayRef, optional<Generator>, Tensor&>);
|
||||
|
||||
m.impl("randn", unsupportedRandomOp<IntArrayRef, TENSOROPTIONS>);
|
||||
m.impl("randn.generator", unsupportedRandomOp<IntArrayRef, std::optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("randn.names", unsupportedRandomOp<IntArrayRef, std::optional<DimnameList>, TENSOROPTIONS>);
|
||||
m.impl("randn.generator_with_names", unsupportedRandomOp<IntArrayRef, std::optional<Generator>, std::optional<DimnameList>, TENSOROPTIONS>);
|
||||
m.impl("randn.generator", unsupportedRandomOp<IntArrayRef, optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("randn.names", unsupportedRandomOp<IntArrayRef, optional<DimnameList>, TENSOROPTIONS>);
|
||||
m.impl("randn.generator_with_names", unsupportedRandomOp<IntArrayRef, optional<Generator>, optional<DimnameList>, TENSOROPTIONS>);
|
||||
m.impl("randn.out", unsupportedRandomOp_<IntArrayRef, Tensor&>);
|
||||
m.impl("randn.generator_out", unsupportedRandomOp_<IntArrayRef, std::optional<Generator>, Tensor&>);
|
||||
m.impl("randn.generator_out", unsupportedRandomOp_<IntArrayRef, optional<Generator>, Tensor&>);
|
||||
|
||||
m.impl("randperm", unsupportedRandomOp<int64_t, TENSOROPTIONS>);
|
||||
m.impl("randperm.generator", unsupportedRandomOp<int64_t, std::optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("randperm.generator", unsupportedRandomOp<int64_t, optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("randperm.out", unsupportedRandomOp_<int64_t, Tensor&>);
|
||||
m.impl("randperm.generator_out", unsupportedRandomOp_<int64_t, std::optional<Generator>, Tensor&>);
|
||||
m.impl("randperm.generator_out", unsupportedRandomOp_<int64_t, optional<Generator>, Tensor&>);
|
||||
|
||||
m.impl("randint", unsupportedRandomOp<int64_t, IntArrayRef, TENSOROPTIONS>);
|
||||
m.impl("randint.generator", unsupportedRandomOp<int64_t, IntArrayRef, std::optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("randint.generator", unsupportedRandomOp<int64_t, IntArrayRef, optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("randint.low", unsupportedRandomOp<int64_t, int64_t, IntArrayRef, TENSOROPTIONS>);
|
||||
m.impl("randint.low_generator", unsupportedRandomOp<int64_t, int64_t, IntArrayRef, std::optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("randint.low_generator", unsupportedRandomOp<int64_t, int64_t, IntArrayRef, optional<Generator>, TENSOROPTIONS>);
|
||||
m.impl("randint.out", unsupportedRandomOp_<int64_t, IntArrayRef, Tensor&>);
|
||||
m.impl("randint.generator_out", unsupportedRandomOp_<int64_t, IntArrayRef, std::optional<Generator>, Tensor&>);
|
||||
m.impl("randint.generator_out", unsupportedRandomOp_<int64_t, IntArrayRef, optional<Generator>, Tensor&>);
|
||||
m.impl("randint.low_out", unsupportedRandomOp_<int64_t, int64_t, IntArrayRef, Tensor&>);
|
||||
m.impl("randint.low_generator_out", unsupportedRandomOp_<int64_t, int64_t, IntArrayRef, std::optional<Generator>, Tensor&>);
|
||||
m.impl("randint.low_generator_out", unsupportedRandomOp_<int64_t, int64_t, IntArrayRef, optional<Generator>, Tensor&>);
|
||||
|
||||
m.impl("uniform_", unsupportedRandomOp_<Tensor&, double, double, std::optional<Generator>>);
|
||||
m.impl("uniform_", unsupportedRandomOp_<Tensor&, double, double, optional<Generator>>);
|
||||
|
||||
#undef TENSOROPTIONS
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@ namespace at {
|
||||
|
||||
constexpr size_t dim_bitset_size = 64;
|
||||
|
||||
inline std::bitset<dim_bitset_size> dim_list_to_bitset(
|
||||
static inline std::bitset<dim_bitset_size> dim_list_to_bitset(
|
||||
OptionalIntArrayRef opt_dims,
|
||||
size_t ndims) {
|
||||
TORCH_CHECK(
|
||||
|
||||
@ -296,7 +296,7 @@ TORCH_API Tensor cached_cast(
|
||||
const Tensor& arg,
|
||||
c10::DeviceType device_type = c10::DeviceType::CUDA);
|
||||
|
||||
// Overload to process std::optional<Tensor>
|
||||
// Overload to process optional<Tensor>
|
||||
inline std::optional<Tensor> cached_cast(
|
||||
at::ScalarType to_type,
|
||||
const std::optional<Tensor>& arg,
|
||||
|
||||
@ -364,7 +364,7 @@ public:
|
||||
bool is(const Dict& rhs) const;
|
||||
|
||||
// private API for now because the return type will change to TypePtr
|
||||
// instead of std::optional<TypePtr> once types are mandatory.
|
||||
// instead of optional<TypePtr> once types are mandatory.
|
||||
TypePtr keyType() const;
|
||||
TypePtr valueType() const;
|
||||
|
||||
|
||||
@ -18,7 +18,7 @@ TORCH_API std::ostream& print(
|
||||
std::ostream& stream,
|
||||
const Tensor& tensor,
|
||||
int64_t linesize);
|
||||
inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
|
||||
static inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
|
||||
return print(out,t,80);
|
||||
}
|
||||
TORCH_API void print(const Tensor & t, int64_t linesize=80);
|
||||
|
||||
@ -159,7 +159,7 @@ class IListRefTagImpl<IListRefTag::Unboxed, at::OptionalTensorRef>
|
||||
|
||||
template <>
|
||||
class IListRefTagImpl<IListRefTag::Boxed, at::OptionalTensorRef>
|
||||
: public IListRefTagImplBase<IListRefTag::Boxed, at::OptionalTensorRef, std::optional<at::Tensor>> {
|
||||
: public IListRefTagImplBase<IListRefTag::Boxed, at::OptionalTensorRef, optional<at::Tensor>> {
|
||||
|
||||
public:
|
||||
/*
|
||||
|
||||
@ -18,11 +18,11 @@ static std::vector<at::Tensor> get_tensor_vector() {
|
||||
return tensors;
|
||||
}
|
||||
|
||||
static std::vector<std::optional<at::Tensor>> get_boxed_opt_tensor_vector() {
|
||||
std::vector<std::optional<at::Tensor>> optional_tensors;
|
||||
static std::vector<optional<at::Tensor>> get_boxed_opt_tensor_vector() {
|
||||
std::vector<optional<at::Tensor>> optional_tensors;
|
||||
const size_t SIZE = 5;
|
||||
for (size_t i = 0; i < SIZE * 2; i++) {
|
||||
auto opt_tensor = (i % 2 == 0) ? std::optional<at::Tensor>(at::empty({0})) : nullopt;
|
||||
auto opt_tensor = (i % 2 == 0) ? optional<at::Tensor>(at::empty({0})) : nullopt;
|
||||
optional_tensors.emplace_back(opt_tensor);
|
||||
}
|
||||
return optional_tensors;
|
||||
@ -234,7 +234,7 @@ TEST(ITensorListRefIteratorTest, Unboxed_Iterate) {
|
||||
|
||||
TEST(IOptTensorListRefTest, Boxed_Iterate) {
|
||||
auto vec = get_boxed_opt_tensor_vector();
|
||||
const List<std::optional<at::Tensor>> boxed(vec);
|
||||
const List<optional<at::Tensor>> boxed(vec);
|
||||
at::IOptTensorListRef list(boxed);
|
||||
size_t i = 0;
|
||||
for (const auto t : list) {
|
||||
|
||||
@ -16,7 +16,7 @@ void NamesMode::set_enabled(bool enabled) {
|
||||
c10::impl::tls_set_dispatch_key_excluded(DispatchKey::Named, !enabled);
|
||||
}
|
||||
|
||||
const TensorBase& internal_set_names_inplace(const TensorBase& tensor, std::optional<DimnameList> names) {
|
||||
const TensorBase& internal_set_names_inplace(const TensorBase& tensor, optional<DimnameList> names) {
|
||||
impl::internal_set_names_inplace(tensor.unsafeGetTensorImpl(), names, /*validate_names=*/true);
|
||||
return tensor;
|
||||
}
|
||||
@ -84,7 +84,7 @@ void check_names_valid_for(TensorImpl* impl, DimnameList names) {
|
||||
check_names_valid_for(impl->dim(), names);
|
||||
}
|
||||
|
||||
void internal_set_names_inplace(TensorImpl* impl, std::optional<DimnameList> names, bool validate_names) {
|
||||
void internal_set_names_inplace(TensorImpl* impl, optional<DimnameList> names, bool validate_names) {
|
||||
TORCH_CHECK(impl->layout() == Layout::Strided,
|
||||
"NYI: named tensors only support strided layout");
|
||||
TORCH_CHECK(impl->device().is_cpu() || impl->device().is_cuda() || impl->device().is_xpu() || impl->device().is_privateuseone(),
|
||||
@ -130,7 +130,7 @@ void internal_set_names_inplace(TensorImpl* impl, std::vector<Dimname>&& names,
|
||||
optional<DimnameList> get_opt_names(const TensorImpl* impl) {
|
||||
const auto* meta = get_named_tensor_meta(impl);
|
||||
if (meta == nullptr) {
|
||||
return std::nullopt;
|
||||
return nullopt;
|
||||
} else {
|
||||
return meta->names();
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@ class TensorBase;
|
||||
// actually exists outside of c10 and needs to be moved in.
|
||||
|
||||
// TensorImpl has a unique_ptr<NamedTensorMetaInterface> field.
|
||||
// XXX: Ideally we would just put std::optional<vector<Dimname>> into TensorImpl.
|
||||
// XXX: Ideally we would just put optional<vector<Dimname>> into TensorImpl.
|
||||
//
|
||||
// This class has an important invariant: there must be at least ONE
|
||||
// non-wildcard
|
||||
|
||||
@ -93,7 +93,7 @@ torch::jit::Stack boxArgs(Args... args) {
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline constexpr size_t boxed_size_one() {
|
||||
static inline constexpr size_t boxed_size_one() {
|
||||
static_assert(!std::is_same<std::decay_t<T>, c10::TensorOptions>::value, "need to patch this path to support TensorOptions passed by reference");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -393,9 +393,9 @@ namespace impl {
|
||||
};
|
||||
template<class T, bool AllowDeprecatedTypes>
|
||||
struct ivalue_to_arg<optional<ArrayRef<T>>, AllowDeprecatedTypes> final {
|
||||
// If an argument is std::optional<ArrayRef<T>>, convert the IValue to an std::optional<std::vector<T>> and pass that
|
||||
// to the operator. OptionalArray<T> is basically a std::optional<std::vector<T>> but implicitly convertible
|
||||
// to std::optional<ArrayRef<T>>.
|
||||
// If an argument is optional<ArrayRef<T>>, convert the IValue to an optional<std::vector<T>> and pass that
|
||||
// to the operator. OptionalArray<T> is basically a optional<std::vector<T>> but implicitly convertible
|
||||
// to optional<ArrayRef<T>>.
|
||||
static OptionalArray<T> call(IValue& v) {
|
||||
return ivalue_to_arg<OptionalArray<T>, AllowDeprecatedTypes>::call(v);
|
||||
}
|
||||
@ -404,8 +404,8 @@ namespace impl {
|
||||
template<class T, bool AllowDeprecatedTypes>
|
||||
struct ivalue_to_arg<OptionalArrayRef<T>, AllowDeprecatedTypes> final {
|
||||
// If an argument is OptionalArrayRef<T>, convert the IValue to an
|
||||
// std::optional<std::vector<T>> and pass that to the operator. OptionalArray<T>
|
||||
// is basically a std::optional<std::vector<T>> but implicitly convertible to
|
||||
// optional<std::vector<T>> and pass that to the operator. OptionalArray<T>
|
||||
// is basically a optional<std::vector<T>> but implicitly convertible to
|
||||
// OptionalArrayRef<T>
|
||||
static OptionalArray<T> call(IValue& v) {
|
||||
return ivalue_to_arg<OptionalArray<T>, AllowDeprecatedTypes>::call(v);
|
||||
|
||||
@ -325,7 +325,7 @@ struct TORCH_API FunctionSchema {
|
||||
std::optional<AliasAnalysisKind> alias_kind_;
|
||||
|
||||
template <typename T>
|
||||
void checkArg(const IValue& value, const Argument& argument, std::optional<size_t> pos) const;
|
||||
void checkArg(const IValue& value, const Argument& argument, optional<size_t> pos) const;
|
||||
|
||||
void checkSchema() const {
|
||||
bool seen_default_arg = false;
|
||||
|
||||
@ -328,7 +328,7 @@ template<typename T>
|
||||
inline void FunctionSchema::checkArg(
|
||||
const IValue& value,
|
||||
const Argument& argument,
|
||||
std::optional<size_t> pos) const {
|
||||
optional<size_t> pos) const {
|
||||
if (value.isTensor() && argument.type() == TensorType::get()) {
|
||||
// Fast-path for the common case
|
||||
return;
|
||||
|
||||
@ -87,7 +87,7 @@ struct StreamData3Holder : c10::intrusive_ptr_target {
|
||||
} // namespace ivalue
|
||||
|
||||
// This is an owning wrapper for a std::optional<std::vector<T>>
|
||||
// that can be implicitly converted to a (non-owning) std::optional<ArrayRef<T>>.
|
||||
// that can be implicitly converted to a (non-owning) optional<ArrayRef<T>>.
|
||||
// Its purpose is to be used in generated code to keep the vector alive
|
||||
// either until the end of a statement (as a temporary), or as a saved arg
|
||||
// in autograd.
|
||||
@ -120,14 +120,14 @@ struct OptionalArray {
|
||||
|
||||
operator std::optional<c10::ArrayRef<T>>() {
|
||||
if (!list) {
|
||||
return std::nullopt;
|
||||
return nullopt;
|
||||
}
|
||||
return *list;
|
||||
}
|
||||
|
||||
operator c10::OptionalArrayRef<T>() {
|
||||
if (!list) {
|
||||
return std::nullopt;
|
||||
return nullopt;
|
||||
}
|
||||
return *list;
|
||||
}
|
||||
@ -1021,9 +1021,9 @@ struct TORCH_API IValue final {
|
||||
// ToOptional: convert a IValue to the Optional obj that accepts both T and
|
||||
// None
|
||||
template <typename T>
|
||||
std::optional<T> toOptional();
|
||||
optional<T> toOptional();
|
||||
template <typename T>
|
||||
std::optional<T> toOptional() const;
|
||||
optional<T> toOptional() const;
|
||||
|
||||
/// @private [doxygen private]
|
||||
/// this is a shallow comparison of two IValues to test the object identity
|
||||
|
||||
@ -1375,7 +1375,7 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target {
|
||||
// The device that was current when markCompleted was called, which we'll
|
||||
// restore when invoking callbacks. It's optional because we'll only store it
|
||||
// if the future completes successfully.
|
||||
std::optional<c10::Device> currentDevice_;
|
||||
optional<c10::Device> currentDevice_;
|
||||
|
||||
// The events that correspond to the completion of the async I/O kernels. They
|
||||
// are recorded on the appropriate streams when the future is marked completed
|
||||
@ -1748,7 +1748,7 @@ template <class T>
|
||||
struct _fake_type {};
|
||||
|
||||
// generic_to<T> converts an IValue from a generic list or generic dict
|
||||
// to a concrete list/dict type likelike List<T>, Dict<...> or std::optional<T>.
|
||||
// to a concrete list/dict type likelike List<T>, Dict<...> or optional<T>.
|
||||
// Note that in the case of lists, this only works for IValue-based lists,
|
||||
// i.e. not for int64_t, double, ...
|
||||
// generic_to<T> is an implementation detail of IValue::to<T> and not
|
||||
@ -1949,7 +1949,7 @@ inline T IValue::to() && {
|
||||
template <>
|
||||
inline std::optional<c10::string_view> IValue::to() && {
|
||||
// In the default implementation, the IValue is destroyed with std::move.
|
||||
// But if the unboxed type is std::optional<string_view> we cannot destroy
|
||||
// But if the unboxed type is optional<string_view> we cannot destroy
|
||||
// the IValue.
|
||||
return generic_to(*this, _fake_type<std::optional<c10::string_view>>{});
|
||||
}
|
||||
@ -2366,7 +2366,7 @@ inline std::optional<std::reference_wrapper<const std::string>> IValue::
|
||||
if (isNone()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
AT_ASSERT(isString(), "Expected std::optional<string> but got ", tagKind());
|
||||
AT_ASSERT(isString(), "Expected optional<string> but got ", tagKind());
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
|
||||
payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(),
|
||||
"called toOptionalStringRef on null intrusive_ptr IValue");
|
||||
@ -2390,17 +2390,17 @@ inline PyObject* IValue::toPyObject() const {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::optional<T> IValue::toOptional() {
|
||||
inline optional<T> IValue::toOptional() {
|
||||
if (this->isNone()) {
|
||||
return std::nullopt;
|
||||
return nullopt;
|
||||
}
|
||||
return this->to<T>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline std::optional<T> IValue::toOptional() const {
|
||||
inline optional<T> IValue::toOptional() const {
|
||||
if (this->isNone()) {
|
||||
return std::nullopt;
|
||||
return nullopt;
|
||||
}
|
||||
return this->to<T>();
|
||||
}
|
||||
|
||||
@ -2043,7 +2043,7 @@ template <class T, bool fake>
|
||||
struct getMaybeFakeTypePtr_<std::optional<T>, fake> final {
|
||||
static const auto& call() {
|
||||
static auto inner_type = getMaybeFakeTypePtr_<T, fake>::call();
|
||||
// The "per std::optional<T>" static singleton needs to live in a .cpp file,
|
||||
// The "per optional<T>" static singleton needs to live in a .cpp file,
|
||||
// otherwise we'll end up with one singleton instance per shared library.
|
||||
static auto type = OptionalType::get(inner_type);
|
||||
return type;
|
||||
@ -2055,7 +2055,7 @@ template<>
|
||||
struct getTypePtr_<at::OptionalIntArrayRef> final {
|
||||
static const auto& call() {
|
||||
static auto inner_type = getMaybeFakeTypePtr_<IntArrayRef, false>::call();
|
||||
// The "per std::optional<T>" static singleton needs to live in a .cpp file,
|
||||
// The "per optional<T>" static singleton needs to live in a .cpp file,
|
||||
// otherwise we'll end up with one singleton instance per shared library.
|
||||
static auto type = OptionalType::get(inner_type);
|
||||
return type;
|
||||
@ -2065,7 +2065,7 @@ struct getTypePtr_<at::OptionalIntArrayRef> final {
|
||||
template <bool fake>
|
||||
struct getMaybeFakeTypePtr_<at::OptionalSymIntArrayRef, fake> final {
|
||||
static const auto& call() {
|
||||
// The "per std::optional<T>" static singleton needs to live in a .cpp file,
|
||||
// The "per optional<T>" static singleton needs to live in a .cpp file,
|
||||
// otherwise we'll end up with one singleton instance per shared library.
|
||||
static auto inner_type = getMaybeFakeTypePtr_<SymIntArrayRef, fake>::call();
|
||||
static auto type = OptionalType::get(inner_type);
|
||||
|
||||
@ -455,7 +455,7 @@ struct TORCH_API Type {
|
||||
// this method.
|
||||
std::string annotation_str(const TypePrinter& printer) const {
|
||||
if (printer) {
|
||||
// the printer can return std::nullopt to fall through to the default impl
|
||||
// the printer can return nullopt to fall through to the default impl
|
||||
if (auto renamed = printer(*this)) {
|
||||
return *renamed;
|
||||
}
|
||||
|
||||
@ -9,11 +9,11 @@
|
||||
* [Note: hacky wrapper removal for optional tensor]
|
||||
*
|
||||
* The kernel implementation takes an optional tensor marked in the schema as
|
||||
* Tensor? but the C++ function takes Tensor instead of the std::optional<Tensor>
|
||||
* Tensor? but the C++ function takes Tensor instead of the optional<Tensor>
|
||||
* expected by the dispatcher.
|
||||
*
|
||||
* To remove the hacky wrapper, the C++ function is changed to take
|
||||
* std::optional<Tensor> and unwrap the Tensor value at the beginning of
|
||||
* optional<Tensor> and unwrap the Tensor value at the beginning of
|
||||
* the function, e.g.:
|
||||
* > c10::MaybeOwned<Tensor> weight_maybe_owned =
|
||||
* > at::borrow_from_optional_tensor(weight_opt);
|
||||
@ -62,7 +62,7 @@ inline void check_and_update_common_device(optional<Device>& common_device, cons
|
||||
}
|
||||
}
|
||||
|
||||
inline void check_and_update_common_device(optional<Device>& common_device, const std::optional<at::Tensor>& tensor, at::CheckedFrom methodName, at::CheckedFrom argName) {
|
||||
inline void check_and_update_common_device(optional<Device>& common_device, const optional<at::Tensor>& tensor, at::CheckedFrom methodName, at::CheckedFrom argName) {
|
||||
if (tensor.has_value()) {
|
||||
check_and_update_common_device(common_device, tensor.value(), methodName, argName);
|
||||
}
|
||||
|
||||
@ -434,7 +434,7 @@ public:
|
||||
std::optional<std::variant<OperatorName, FunctionSchema>> schemaOrName_;
|
||||
|
||||
std::vector<KernelRegistrationConfig> kernels;
|
||||
std::optional<AliasAnalysisKind> aliasAnalysisKind_;
|
||||
optional<AliasAnalysisKind> aliasAnalysisKind_;
|
||||
friend class RegisterOperators;
|
||||
friend class Library;
|
||||
};
|
||||
|
||||
@ -133,6 +133,32 @@ struct VecConvert<int32_t, 1, uint8_t, 1> {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
struct VecConvert<int32_t, 1, float, 1> {
|
||||
static inline VectorizedN<int32_t, 1> apply(
|
||||
const VectorizedN<float, 1>& src) {
|
||||
return Vectorized<int32_t>(_mm256_cvttps_epi32(src[0]));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct VecConvert<float, 1, int32_t, 1> {
|
||||
static inline VectorizedN<float, 1> apply(
|
||||
const VectorizedN<int32_t, 1>& src) {
|
||||
return Vectorized<float>(_mm256_cvtepi32_ps(src[0]));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct VecConvert<int16_t, 1, uint8_t, 1> {
|
||||
static inline VectorizedN<int16_t, 1> apply(
|
||||
const VectorizedN<uint8_t, 1>& src) {
|
||||
auto src128 = _mm256_castsi256_si128(src[0]);
|
||||
return Vectorized<int16_t>(_mm256_cvtepu8_epi16(src128));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename dst_t, typename src_t>
|
||||
struct VecConvert<
|
||||
dst_t,
|
||||
|
||||
@ -246,6 +246,12 @@ public:
|
||||
return _mm256_floor_pd(values);
|
||||
}
|
||||
Vectorized<double> frac() const;
|
||||
double reduce_add() const {
|
||||
return values[0];
|
||||
}
|
||||
double reduce_max() const {
|
||||
return values[0];
|
||||
}
|
||||
Vectorized<double> neg() const {
|
||||
return _mm256_xor_pd(_mm256_set1_pd(-0.), values);
|
||||
}
|
||||
|
||||
@ -342,6 +342,12 @@ public:
|
||||
}
|
||||
return loadu(tmp);
|
||||
}
|
||||
float reduce_add() const {
|
||||
return values[0];
|
||||
}
|
||||
float reduce_max() const {
|
||||
return values[0];
|
||||
}
|
||||
Vectorized<float> neg() const {
|
||||
return _mm256_xor_ps(_mm256_set1_ps(-0.f), values);
|
||||
}
|
||||
|
||||
@ -241,6 +241,12 @@ public:
|
||||
Vectorized<int32_t> abs() const {
|
||||
return _mm256_abs_epi32(values);
|
||||
}
|
||||
int32_t reduce_add() const {
|
||||
return values[0];
|
||||
}
|
||||
int32_t reduce_max() const {
|
||||
return values[0];
|
||||
}
|
||||
Vectorized<int32_t> real() const {
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#define SLEEF_STATIC_LIBS
|
||||
#include <sleef.h>
|
||||
#endif
|
||||
#include <iostream>
|
||||
|
||||
namespace at {
|
||||
namespace vec {
|
||||
@ -43,6 +44,9 @@ static inline void cvtbf16_fp32(const __m512i& a, __m512& o1, __m512& o2) {
|
||||
}
|
||||
|
||||
static inline __m256i cvtfp32_bf16(const __m512& src) {
|
||||
// #if defined(CPU_CAPABILITY_AVX512_BF16)
|
||||
// return reinterpret_cast<__m256i>(_mm512_cvtneps_pbh(src));
|
||||
// #else
|
||||
__m512i value = _mm512_castps_si512(src);
|
||||
__m512i nan = _mm512_set1_epi32(0xffff);
|
||||
auto mask_value = _mm512_cmp_ps_mask(src, src, _CMP_ORD_Q);
|
||||
@ -59,6 +63,7 @@ static inline __m256i cvtfp32_bf16(const __m512& src) {
|
||||
// Check NaN before converting back to bf16
|
||||
t_value = _mm512_mask_blend_epi32(mask_value, nan, t_value);
|
||||
return _mm512_cvtusepi32_epi16(t_value);
|
||||
// #endif
|
||||
}
|
||||
|
||||
static inline __m512i cvtfp32_bf16(const __m512& a, const __m512& b) {
|
||||
|
||||
@ -117,6 +117,49 @@ struct VecConvert<int32_t, 1, uint8_t, 1> {
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct VecConvert<int32_t, 1, float, 1> {
|
||||
static inline VectorizedN<int32_t, 1> apply(
|
||||
const VectorizedN<float, 1>& src) {
|
||||
return Vectorized<int32_t>(_mm512_cvttps_epi32(src[0]));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct VecConvert<float, 1, int32_t, 1> {
|
||||
static inline VectorizedN<float, 1> apply(
|
||||
const VectorizedN<int32_t, 1>& src) {
|
||||
return Vectorized<float>(_mm512_cvtepi32_ps(src[0]));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct VecConvert<int16_t, 1, uint8_t, 1> {
|
||||
static inline VectorizedN<int16_t, 1> apply(
|
||||
const VectorizedN<uint8_t, 1>& src) {
|
||||
auto src256 = _mm512_castsi512_si256(src[0]);
|
||||
return Vectorized<int16_t>(_mm512_cvtepu8_epi16(src256));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct VecConvert<int8_t, 1, int32_t, 1> {
|
||||
static inline VectorizedN<int8_t, 1> apply(
|
||||
const VectorizedN<int32_t, 1>& src) {
|
||||
auto src128 = _mm512_cvtepi32_epi8(src[0]);
|
||||
return Vectorized<int8_t>(_mm512_castsi128_si512(src128));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct VecConvert<int8_t, 1, int16_t, 1> {
|
||||
static inline VectorizedN<int8_t, 1> apply(
|
||||
const VectorizedN<int16_t, 1>& src) {
|
||||
auto src256 = _mm512_cvtepi16_epi8(src[0]);
|
||||
return Vectorized<int8_t>(_mm512_castsi256_si512(src256));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename dst_t, typename src_t>
|
||||
struct VecConvert<
|
||||
dst_t,
|
||||
|
||||
@ -255,6 +255,12 @@ public:
|
||||
return _mm512_floor_pd(values);
|
||||
}
|
||||
Vectorized<double> frac() const;
|
||||
double reduce_add() const {
|
||||
return values[0];
|
||||
}
|
||||
double reduce_max() const {
|
||||
return values[0];
|
||||
}
|
||||
Vectorized<double> neg() const {
|
||||
return _mm512_xor_pd(_mm512_set1_pd(-0.), values);
|
||||
}
|
||||
|
||||
@ -236,27 +236,27 @@ public:
|
||||
}
|
||||
Vectorized<float> exp_u20() const {
|
||||
// A faster version of exp with ULP=20
|
||||
static __m512 vec_factorial_1 =
|
||||
const __m512 vec_factorial_1 =
|
||||
_mm512_set1_ps(0.999999701f); // 1/factorial(1)
|
||||
static __m512 vec_factorial_2 =
|
||||
const __m512 vec_factorial_2 =
|
||||
_mm512_set1_ps(0.499991506f); // 1/factorial(2)
|
||||
static __m512 vec_factorial_3 =
|
||||
const __m512 vec_factorial_3 =
|
||||
_mm512_set1_ps(0.166676521f); // 1/factorial(3)
|
||||
static __m512 vec_factorial_4 =
|
||||
const __m512 vec_factorial_4 =
|
||||
_mm512_set1_ps(0.0418978221f); // 1/factorial(4)
|
||||
static __m512 vec_factorial_5 =
|
||||
const __m512 vec_factorial_5 =
|
||||
_mm512_set1_ps(0.00828929059f); // 1/factorial(5)
|
||||
static __m512 vec_exp_log2ef =
|
||||
const __m512 vec_exp_log2ef =
|
||||
_mm512_castsi512_ps(_mm512_set1_epi32(0x3fb8aa3b)); // log2(e)
|
||||
static __m512 vec_half = _mm512_set1_ps(0.5f);
|
||||
static __m512 vec_one = _mm512_set1_ps(1.f);
|
||||
static __m512 vec_zero = _mm512_set1_ps(0.f);
|
||||
static __m512 vec_two = _mm512_set1_ps(2.f);
|
||||
static __m512 vec_ln2f = _mm512_castsi512_ps(_mm512_set1_epi32(0x3f317218)); // ln(2)
|
||||
static __m512 vec_ln_flt_min = _mm512_castsi512_ps(_mm512_set1_epi32(0xc2aeac50));
|
||||
static __m512 vec_ln_flt_max = _mm512_castsi512_ps(_mm512_set1_epi32(0x42b17218));
|
||||
static __m512i vec_127 = _mm512_set1_epi32(0x0000007f);
|
||||
static int n_mantissa_bits = 23;
|
||||
const __m512 vec_half = _mm512_set1_ps(0.5f);
|
||||
const __m512 vec_one = _mm512_set1_ps(1.f);
|
||||
const __m512 vec_zero = _mm512_set1_ps(0.f);
|
||||
const __m512 vec_two = _mm512_set1_ps(2.f);
|
||||
const __m512 vec_ln2f = _mm512_castsi512_ps(_mm512_set1_epi32(0x3f317218)); // ln(2)
|
||||
const __m512 vec_ln_flt_min = _mm512_castsi512_ps(_mm512_set1_epi32(0xc2aeac50));
|
||||
const __m512 vec_ln_flt_max = _mm512_castsi512_ps(_mm512_set1_epi32(0x42b17218));
|
||||
const __m512i vec_127 = _mm512_set1_epi32(0x0000007f);
|
||||
const int n_mantissa_bits = 23;
|
||||
|
||||
// exp(x) =
|
||||
// = exp(n * ln(2) + r) // divide x by ln(2) and get quot and rem
|
||||
@ -364,6 +364,12 @@ public:
|
||||
}
|
||||
return loadu(tmp);
|
||||
}
|
||||
float reduce_add() const {
|
||||
return _mm512_reduce_add_ps(values);
|
||||
}
|
||||
float reduce_max() const {
|
||||
return _mm512_reduce_max_ps(values);
|
||||
}
|
||||
Vectorized<float> neg() const {
|
||||
return _mm512_xor_ps(_mm512_set1_ps(-0.f), values);
|
||||
}
|
||||
@ -473,26 +479,26 @@ inline Vectorized<float> Vectorized<float>::frac() const {
|
||||
// either input is a NaN.
|
||||
template <>
|
||||
Vectorized<float> inline maximum(const Vectorized<float>& a, const Vectorized<float>& b) {
|
||||
auto zero_vec = _mm512_set1_epi32(0);
|
||||
auto max = _mm512_max_ps(a, b);
|
||||
auto isnan_mask = _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q);
|
||||
auto isnan = _mm512_castsi512_ps(_mm512_mask_set1_epi32(zero_vec, isnan_mask,
|
||||
0xFFFFFFFF));
|
||||
// Exploit the fact that all-ones is a NaN.
|
||||
return _mm512_or_ps(max, isnan);
|
||||
// auto zero_vec = _mm512_set1_epi32(0);
|
||||
return _mm512_max_ps(a, b);
|
||||
// auto isnan_mask = _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q);
|
||||
// auto isnan = _mm512_castsi512_ps(_mm512_mask_set1_epi32(zero_vec, isnan_mask,
|
||||
// 0xFFFFFFFF));
|
||||
// // Exploit the fact that all-ones is a NaN.
|
||||
// return _mm512_or_ps(max, isnan);
|
||||
}
|
||||
|
||||
// Implements the IEEE 754 201X `minimum` operation, which propagates NaN if
|
||||
// either input is a NaN.
|
||||
template <>
|
||||
Vectorized<float> inline minimum(const Vectorized<float>& a, const Vectorized<float>& b) {
|
||||
auto zero_vec = _mm512_set1_epi32(0);
|
||||
auto min = _mm512_min_ps(a, b);
|
||||
auto isnan_mask = _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q);
|
||||
auto isnan = _mm512_castsi512_ps(_mm512_mask_set1_epi32(zero_vec, isnan_mask,
|
||||
0xFFFFFFFF));
|
||||
// auto zero_vec = _mm512_set1_epi32(0);
|
||||
return _mm512_min_ps(a, b);
|
||||
// auto isnan_mask = _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q);
|
||||
// auto isnan = _mm512_castsi512_ps(_mm512_mask_set1_epi32(zero_vec, isnan_mask,
|
||||
// 0xFFFFFFFF));
|
||||
// Exploit the fact that all-ones is a NaN.
|
||||
return _mm512_or_ps(min, isnan);
|
||||
// return _mm512_or_ps(min, isnan);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@ -267,6 +267,12 @@ public:
|
||||
Vectorized<int32_t> abs() const {
|
||||
return _mm512_abs_epi32(values);
|
||||
}
|
||||
int32_t reduce_add() const {
|
||||
return _mm512_reduce_add_epi32(values);
|
||||
}
|
||||
int32_t reduce_max() const {
|
||||
return _mm512_reduce_max_epi32(values);
|
||||
}
|
||||
Vectorized<int32_t> real() const {
|
||||
return *this;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user