mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 21:49:24 +08:00
Compare commits
124 Commits
viable/str
...
cpp-docs-d
Author | SHA1 | Date | |
---|---|---|---|
bad5269194 | |||
7bc13c802b | |||
d872529792 | |||
0d39ecb2ce | |||
90c0825e2d | |||
fd4bde430a | |||
b5e93ffdcf | |||
f8d0d65ddc | |||
f46ddb1e65 | |||
20082d7136 | |||
7158aa22e8 | |||
2035f6b2e6 | |||
2b58adc3bd | |||
322091d8d8 | |||
2bb4e6876c | |||
56ef7743fc | |||
64108bdbed | |||
c855f8632e | |||
12d2ef557f | |||
65aa62d50d | |||
6a09f9306c | |||
19bf67be32 | |||
1927783aa3 | |||
184817c7a8 | |||
da903b6a8b | |||
f76fdcaaf8 | |||
608792153f | |||
086dec3235 | |||
ad7b2bebc6 | |||
d444384003 | |||
3040a5d294 | |||
97463d4cf3 | |||
c813617c53 | |||
e659661ffa | |||
41808b2ba9 | |||
c0510dc447 | |||
9ec10dc26a | |||
43fc859625 | |||
f713abab16 | |||
bd3b98a8a5 | |||
e98c4e835b | |||
7b15534434 | |||
c32118dc3e | |||
e3ae80fc03 | |||
483f4e0db9 | |||
d1a62c8036 | |||
6861a27062 | |||
955f21dc2c | |||
9f5e1beaf3 | |||
2e027e8742 | |||
1e42fde45e | |||
f505caa71b | |||
65f10becdf | |||
df640df68a | |||
4c3c0ef2f1 | |||
bc33b10202 | |||
2855a045b3 | |||
9ecd092bd9 | |||
078d475d3b | |||
f37a6523ef | |||
b13cd141b3 | |||
5e47b4dd60 | |||
ee5389d520 | |||
ab01a0d7d3 | |||
801e282f39 | |||
87c9fbda22 | |||
3cc8af2d67 | |||
1fb072ac2a | |||
cac5e13e13 | |||
68350660ee | |||
ef7e2ca77e | |||
cdaaf3e4a3 | |||
0ea59c3c55 | |||
8f705d019a | |||
4bcc05777e | |||
2a6cdba6e5 | |||
53f6cc7529 | |||
ac901bf79a | |||
c965d6dbb2 | |||
ac08556f67 | |||
5fe7f29b9e | |||
ded099ecbf | |||
63fcc3e6c4 | |||
fd3e15c14f | |||
ff5faa744a | |||
4725871a81 | |||
bcd96cc6ff | |||
50e077beaa | |||
56d66ac0d7 | |||
49f7d8d19d | |||
afee8062d5 | |||
e89d12bf5d | |||
d4752bc7f6 | |||
44a5d41993 | |||
361c5d362c | |||
1fc71d1b57 | |||
8f54e27e5d | |||
8c0bc879b9 | |||
746fe78ecd | |||
b63bbe1661 | |||
3912ba3e94 | |||
cfc5cc17dc | |||
fdc8ccc5bc | |||
48b54b45d6 | |||
6861fa43e5 | |||
c1f40d33c8 | |||
7e7ac2039d | |||
23ab6a45e5 | |||
b558c986e8 | |||
415e641572 | |||
11f5f65686 | |||
af32d16a71 | |||
ba480d6bf7 | |||
4a6abba0d9 | |||
96181d6f76 | |||
2164b66121 | |||
bde18c445d | |||
f3e43ff2d7 | |||
39d0c06ed0 | |||
4ab847bbc7 | |||
4bd1505f84 | |||
1f9614cef8 | |||
35f66b83f8 | |||
4a39820e5e |
@ -37,9 +37,9 @@ case ${DOCKER_TAG_PREFIX} in
|
||||
rocm*)
|
||||
BASE_TARGET=rocm
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
# add gfx950 conditionally starting in ROCm 7.0
|
||||
# add gfx950, gfx115x conditionally starting in ROCm 7.0
|
||||
if [[ "$ROCM_VERSION" == *"7.0"* ]]; then
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
|
||||
fi
|
||||
EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
|
||||
;;
|
||||
|
@ -344,7 +344,7 @@ docker build \
|
||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||
--build-arg "KATEX=${KATEX:-}" \
|
||||
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942;gfx1100}" \
|
||||
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
|
||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
|
@ -1 +1 @@
|
||||
deb42f2a8e48f5032b4a98ee781a15fa87a157cf
|
||||
e0dda9059d082537cee36be6c5e4fe3b18c880c0
|
||||
|
@ -46,9 +46,9 @@ case ${DOCKER_TAG_PREFIX} in
|
||||
BASE_TARGET=rocm
|
||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
# add gfx950 conditionally starting in ROCm 7.0
|
||||
# add gfx950, gfx115x conditionally starting in ROCm 7.0
|
||||
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
|
||||
fi
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
||||
;;
|
||||
|
@ -115,6 +115,9 @@ RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
|
||||
# cmake-3.28.0 from pip for onnxruntime
|
||||
RUN python3 -mpip install cmake==3.28.0
|
||||
|
||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
||||
|
||||
# build onnxruntime 1.21.0 from sources.
|
||||
# it is not possible to build it from sources using pip,
|
||||
# so just build it from upstream repository.
|
||||
|
@ -84,9 +84,9 @@ case ${image} in
|
||||
DEVTOOLSET_VERSION="11"
|
||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
# add gfx950 conditionally starting in ROCm 7.0
|
||||
# add gfx950, gfx115x conditionally starting in ROCm 7.0
|
||||
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
|
||||
fi
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||
;;
|
||||
|
@ -120,9 +120,8 @@ ninja==1.11.1.4
|
||||
numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
|
||||
numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
||||
#Description: Just-In-Time Compiler for Numerical Functions
|
||||
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
|
||||
#Pinned versions: 0.55.2, 0.60.0
|
||||
#test that import: test_numba_integration.py
|
||||
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
|
||||
#Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073
|
||||
|
||||
#numpy
|
||||
@ -242,10 +241,9 @@ pygments==2.15.0
|
||||
#Pinned versions: 14.1.0
|
||||
#test that import:
|
||||
|
||||
scikit-image==0.19.3 ; python_version < "3.10"
|
||||
scikit-image==0.22.0 ; python_version >= "3.10"
|
||||
scikit-image==0.22.0
|
||||
#Description: image processing routines
|
||||
#Pinned versions:
|
||||
#Pinned versions: 0.22.0
|
||||
#test that import: test_nn.py
|
||||
|
||||
#scikit-learn
|
||||
|
@ -1,15 +1,11 @@
|
||||
sphinx==5.3.0
|
||||
sphinx==7.2.6
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 5.3.0
|
||||
#Pinned versions: 7.2.6
|
||||
|
||||
standard-imghdr==3.13.0; python_version >= "3.13"
|
||||
#Description: This is needed by Sphinx, so it needs to be added here.
|
||||
# The reasons are as follows:
|
||||
# 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr);
|
||||
# 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13.
|
||||
# Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency.
|
||||
pytorch_sphinx_theme2==0.1.0
|
||||
#Description: This is needed to generate PyTorch docs
|
||||
#Pinned versions: 0.1.0
|
||||
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
|
||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
||||
# something related to Docker setup. We can investigate this later.
|
||||
@ -36,17 +32,17 @@ tensorboard==2.18.0 ; python_version >= "3.13"
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 2.13.0
|
||||
|
||||
breathe==4.34.0
|
||||
breathe==4.36.0
|
||||
#Description: This is used to generate PyTorch C++ docs
|
||||
#Pinned versions: 4.34.0
|
||||
#Pinned versions: 4.36.0
|
||||
|
||||
exhale==0.2.3
|
||||
exhale==0.3.7
|
||||
#Description: This is used to generate PyTorch C++ docs
|
||||
#Pinned versions: 0.2.3
|
||||
#Pinned versions: 0.3.7
|
||||
|
||||
docutils==0.16
|
||||
docutils==0.20
|
||||
#Description: This is used to generate PyTorch C++ docs
|
||||
#Pinned versions: 0.16
|
||||
#Pinned versions: 0.20
|
||||
|
||||
bs4==0.0.1
|
||||
#Description: This is used to generate PyTorch C++ docs
|
||||
@ -56,13 +52,13 @@ IPython==8.12.0
|
||||
#Description: This is used to generate PyTorch functorch docs
|
||||
#Pinned versions: 8.12.0
|
||||
|
||||
myst-nb==0.17.2
|
||||
myst-nb==1.3.0
|
||||
#Description: This is used to generate PyTorch functorch and torch.compile docs.
|
||||
#Pinned versions: 0.17.2
|
||||
#Pinned versions: 1.3.0
|
||||
|
||||
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
||||
python-etcd==0.4.5
|
||||
sphinx-copybutton==0.5.0
|
||||
sphinx-design==0.4.0
|
||||
sphinx-design==0.6.1
|
||||
sphinxcontrib-mermaid==1.0.0
|
||||
myst-parser==0.18.1
|
||||
myst-parser==4.0.1
|
||||
|
@ -5,7 +5,7 @@ DESIRED_ROCM ?= 7.0
|
||||
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
||||
PACKAGE_NAME = magma-rocm
|
||||
# inherit this from underlying docker image, do not pass this env var to docker
|
||||
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
|
||||
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201
|
||||
|
||||
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
|
||||
@ -18,7 +18,6 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||
.PHONY: all
|
||||
all: magma-rocm70
|
||||
all: magma-rocm64
|
||||
all: magma-rocm63
|
||||
|
||||
.PHONY:
|
||||
clean:
|
||||
@ -34,8 +33,3 @@ magma-rocm70:
|
||||
magma-rocm64: DESIRED_ROCM := 6.4
|
||||
magma-rocm64:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-rocm63
|
||||
magma-rocm63: DESIRED_ROCM := 6.3
|
||||
magma-rocm63:
|
||||
$(DOCKER_RUN)
|
||||
|
@ -67,7 +67,7 @@ fi
|
||||
# wheels with cxx11-abi
|
||||
|
||||
echo "Checking that the gcc ABI is what we expect"
|
||||
if [[ "$(uname)" != 'Darwin' && "$(uname -m)" != "s390x" ]]; then
|
||||
if [[ "$(uname)" != 'Darwin' ]]; then
|
||||
# We also check that there are cxx11 symbols in libtorch
|
||||
#
|
||||
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
||||
|
@ -102,8 +102,18 @@ if [ "$is_main_doc" = true ]; then
|
||||
echo coverage output not found
|
||||
exit 1
|
||||
elif [ $undocumented -gt 0 ]; then
|
||||
echo undocumented objects found:
|
||||
echo "======================================"
|
||||
echo "ERROR: $undocumented undocumented objects found!"
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "Full coverage report:"
|
||||
cat build/coverage/python.txt
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "Undocumented modules/objects (lines after TOTAL):"
|
||||
tail -n +$((lines - undocumented + 1)) build/coverage/python.txt
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "Make sure you've updated relevant .rsts in docs/source!"
|
||||
echo "You can reproduce locally by running 'cd docs && make coverage && cat build/coverage/python.txt'"
|
||||
exit 1
|
||||
|
@ -886,7 +886,7 @@ test_inductor_torchbench_smoketest_perf() {
|
||||
done
|
||||
|
||||
# Perform some "warm-start" runs for a few huggingface models.
|
||||
for test in AlbertForQuestionAnswering AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
|
||||
for test in AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
|
||||
python benchmarks/dynamo/huggingface.py --accuracy --training --amp --inductor --device cuda --warm-start-latency \
|
||||
--only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
|
@ -38,7 +38,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
fi
|
||||
|
||||
# TODO: Move this to .ci/docker/requirements-ci.txt
|
||||
python -m pip install "psutil==5.9.1" "pynvml==11.4.1" "pytest-shard==0.1.2"
|
||||
python -m pip install "psutil==5.9.1" nvidia-ml-py "pytest-shard==0.1.2"
|
||||
|
||||
run_tests() {
|
||||
# Run nvidia-smi if available
|
||||
|
BIN
.github/scripts/drci_mocks.json.gz
vendored
BIN
.github/scripts/drci_mocks.json.gz
vendored
Binary file not shown.
1
.github/scripts/github_utils.py
vendored
1
.github/scripts/github_utils.py
vendored
@ -18,6 +18,7 @@ class GitHubComment:
|
||||
body_text: str
|
||||
created_at: str
|
||||
author_login: str
|
||||
author_url: Optional[str]
|
||||
author_association: str
|
||||
editor_login: Optional[str]
|
||||
database_id: int
|
||||
|
BIN
.github/scripts/gql_mocks.json.gz
vendored
BIN
.github/scripts/gql_mocks.json.gz
vendored
Binary file not shown.
2
.github/scripts/test_check_labels.py
vendored
2
.github/scripts/test_check_labels.py
vendored
@ -38,6 +38,7 @@ def mock_get_comments() -> list[GitHubComment]:
|
||||
body_text="mock_body_text",
|
||||
created_at="",
|
||||
author_login="",
|
||||
author_url=None,
|
||||
author_association="",
|
||||
editor_login=None,
|
||||
database_id=1,
|
||||
@ -48,6 +49,7 @@ def mock_get_comments() -> list[GitHubComment]:
|
||||
body_text=" #" + LABEL_ERR_MSG_TITLE.replace("`", ""),
|
||||
created_at="",
|
||||
author_login=BOT_AUTHORS[1],
|
||||
author_url=None,
|
||||
author_association="",
|
||||
editor_login=None,
|
||||
database_id=2,
|
||||
|
18
.github/scripts/test_trymerge.py
vendored
18
.github/scripts/test_trymerge.py
vendored
@ -32,6 +32,7 @@ from trymerge import (
|
||||
main as trymerge_main,
|
||||
MandatoryChecksMissingError,
|
||||
MergeRule,
|
||||
PostCommentError,
|
||||
RE_GHSTACK_DESC,
|
||||
read_merge_rules,
|
||||
remove_job_name_suffix,
|
||||
@ -588,6 +589,23 @@ class TestTryMerge(TestCase):
|
||||
self.assertEqual(mock_merge_base, pr.get_merge_base())
|
||||
mocked_gh_fetch_merge_base.assert_called_once()
|
||||
|
||||
def test_app_can_revert(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 164660)
|
||||
repo = DummyGitRepo()
|
||||
app_comment_id, impostor_comment_id = 3375785595, 3377647892
|
||||
# Check that app can revert
|
||||
self.assertIsNotNone(validate_revert(repo, pr, comment_id=app_comment_id))
|
||||
# But impostor can not
|
||||
self.assertRaises(
|
||||
PostCommentError,
|
||||
lambda: validate_revert(repo, pr, comment_id=impostor_comment_id),
|
||||
)
|
||||
# Despite it's name being the name of the bot
|
||||
self.assertEqual(
|
||||
pr.get_comment_by_id(impostor_comment_id).author_login,
|
||||
"pytorch-auto-revert",
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||
|
7
.github/scripts/trymerge.py
vendored
7
.github/scripts/trymerge.py
vendored
@ -234,6 +234,7 @@ query ($owner: String!, $name: String!, $number: Int!) {
|
||||
createdAt
|
||||
author {
|
||||
login
|
||||
url
|
||||
}
|
||||
authorAssociation
|
||||
editor {
|
||||
@ -1093,6 +1094,7 @@ class GitHubPR:
|
||||
body_text=node["bodyText"],
|
||||
created_at=node["createdAt"] if "createdAt" in node else "",
|
||||
author_login=node["author"]["login"],
|
||||
author_url=node["author"].get("url", None),
|
||||
author_association=node["authorAssociation"],
|
||||
editor_login=editor["login"] if editor else None,
|
||||
database_id=node["databaseId"],
|
||||
@ -2029,6 +2031,11 @@ def validate_revert(
|
||||
# For some reason, one can not be a member of private repo, only CONTRIBUTOR
|
||||
if pr.is_base_repo_private():
|
||||
allowed_reverters.append("CONTRIBUTOR")
|
||||
# Special case the pytorch-auto-revert app, whose does not have association
|
||||
# But should be able to issue revert command
|
||||
if comment.author_url == "https://github.com/apps/pytorch-auto-revert":
|
||||
allowed_reverters.append("NONE")
|
||||
|
||||
if author_association not in allowed_reverters:
|
||||
raise PostCommentError(
|
||||
f"Will not revert as @{author_login} is not one of "
|
||||
|
@ -63,6 +63,7 @@ jobs:
|
||||
# Same as the build job
|
||||
python-version: 3.12.7
|
||||
test-matrix: ${{ needs.macos-perf-py3-arm64-build.outputs.test-matrix }}
|
||||
timeout-minutes: 300
|
||||
disable-monitor: false
|
||||
monitor-log-interval: 15
|
||||
monitor-data-collect-interval: 4
|
||||
|
6
.github/workflows/periodic.yml
vendored
6
.github/workflows/periodic.yml
vendored
@ -213,9 +213,9 @@ jobs:
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu.mi250.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu.mi250.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu.mi250.4", owners: ["module:rocm", "oncall:distributed"] },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
26
.github/workflows/rocm.yml
vendored
26
.github/workflows/rocm.yml
vendored
@ -59,3 +59,29 @@ jobs:
|
||||
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-rocm-py3_10-gfx1100-test:
|
||||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
name: linux-jammy-rocm-py3_10-gfx1100
|
||||
uses: ./.github/workflows/_rocm-test.yml
|
||||
needs:
|
||||
- linux-jammy-rocm-py3_10-build
|
||||
- target-determination
|
||||
with:
|
||||
build-environment: linux-jammy-rocm-py3.10
|
||||
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx1100" },
|
||||
{ config: "default", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx1100" },
|
||||
]}
|
||||
tests-to-include: >
|
||||
test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs
|
||||
test_autograd inductor/test_torchinductor inductor/test_kernel_benchmark
|
||||
inductor/test_pad_mm inductor/test_benchmark_fusion inductor/test_aot_inductor
|
||||
inductor/test_torchinductor inductor/test_decompose_mem_bound_mm
|
||||
inductor/test_flex_attention inductor/test_max_autotune
|
||||
secrets: inherit
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -88,7 +88,7 @@ torch_compile_debug/
|
||||
# Listed manually because some files in this directory are not generated
|
||||
torch/testing/_internal/generated/annotated_fn_args.py
|
||||
torch/testing/_internal/data/*.pt
|
||||
torch/csrc/api/include/torch/version.h
|
||||
torch/headeronly/version.h
|
||||
torch/csrc/cudnn/cuDNN.cpp
|
||||
torch/csrc/generated
|
||||
torch/csrc/generic/TensorMethods.cpp
|
||||
|
@ -28,7 +28,7 @@ exclude_patterns = [
|
||||
'torch/lib/**',
|
||||
'venv/**',
|
||||
'**/*.pyi',
|
||||
"tools/experimental/dynamic_shapes/torchfuzz/**",
|
||||
"tools/experimental/torchfuzz/**",
|
||||
'tools/test/test_selective_build.py',
|
||||
]
|
||||
command = [
|
||||
@ -198,7 +198,7 @@ exclude_patterns = [
|
||||
'tools/test/gen_operators_yaml_test.py',
|
||||
'tools/test/gen_oplist_test.py',
|
||||
'tools/test/test_selective_build.py',
|
||||
'tools/experimental/dynamic_shapes/torchfuzz/**',
|
||||
'tools/experimental/torchfuzz/**',
|
||||
]
|
||||
command = [
|
||||
'python3',
|
||||
@ -1573,6 +1573,7 @@ exclude_patterns = [
|
||||
'torch/_inductor/fx_passes/serialized_patterns/**',
|
||||
'torch/_inductor/autoheuristic/artifacts/**',
|
||||
'test/dynamo/cpython/**',
|
||||
'test/test_torchfuzz_repros.py',
|
||||
'scripts/**',
|
||||
'third_party/**',
|
||||
'fb/**',
|
||||
|
@ -13,6 +13,9 @@ load(":build_variables.bzl", "jit_core_sources", "lazy_tensor_ts_sources", "libt
|
||||
load(":ufunc_defs.bzl", "aten_ufunc_generated_cpu_kernel_sources", "aten_ufunc_generated_cpu_sources", "aten_ufunc_generated_cuda_sources")
|
||||
load("//:tools/bazel.bzl", "rules")
|
||||
|
||||
# Export files for use by torch/headeronly (where version.h generation now lives)
|
||||
exports_files(["version.txt"])
|
||||
|
||||
define_targets(rules = rules)
|
||||
|
||||
COMMON_COPTS = [
|
||||
@ -690,7 +693,9 @@ cc_library(
|
||||
"torch/csrc/*/generated/*.h",
|
||||
"torch/csrc/jit/serialization/mobile_bytecode_generated.h",
|
||||
] + torch_cuda_headers,
|
||||
) + GENERATED_AUTOGRAD_CPP + [":version_h"],
|
||||
) + GENERATED_AUTOGRAD_CPP + [
|
||||
"//torch/headeronly:version_h",
|
||||
],
|
||||
includes = [
|
||||
"third_party/kineto/libkineto/include",
|
||||
"torch/csrc",
|
||||
|
@ -53,7 +53,7 @@ ARG CUDA_PATH=cu121
|
||||
ARG INSTALL_CHANNEL=whl/nightly
|
||||
# Automatically set by buildx
|
||||
# pinning version of conda here see: https://github.com/pytorch/pytorch/issues/164574
|
||||
RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y python=${PYTHON_VERSION} conda=25.7.0
|
||||
RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} conda=25.7.0
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
|
@ -483,8 +483,8 @@ at::BlasBackend Context::blasPreferredBackend() {
|
||||
#if ROCM_VERSION >= 60300
|
||||
"gfx1100", "gfx1101", "gfx1200", "gfx1201", "gfx908",
|
||||
#endif
|
||||
#if ROCM_VERSION >= 60500
|
||||
"gfx950"
|
||||
#if ROCM_VERSION >= 70000
|
||||
"gfx950", "gfx1150", "gfx1151"
|
||||
#endif
|
||||
};
|
||||
for (auto index: c10::irange(detail::getCUDAHooks().deviceCount())) {
|
||||
|
@ -229,14 +229,14 @@ struct TORCH_API SparseTensorImpl : public TensorImpl {
|
||||
}
|
||||
|
||||
void resize_(int64_t sparse_dim, int64_t dense_dim, ArrayRef<int64_t> size) {
|
||||
return _resize_(sparse_dim, dense_dim, size);
|
||||
_resize_(sparse_dim, dense_dim, size);
|
||||
}
|
||||
|
||||
void resize_(
|
||||
int64_t sparse_dim,
|
||||
int64_t dense_dim,
|
||||
ArrayRef<c10::SymInt> size) {
|
||||
return _resize_(sparse_dim, dense_dim, size);
|
||||
_resize_(sparse_dim, dense_dim, size);
|
||||
}
|
||||
|
||||
// NOTE: this function will resize the sparse tensor and also set `indices`
|
||||
|
@ -59,7 +59,7 @@ static inline void set_item(const Tensor& self, ArrayRef<TensorIndex> indices, c
|
||||
}
|
||||
}
|
||||
|
||||
return set_item(self, indices, value);
|
||||
set_item(self, indices, value);
|
||||
}
|
||||
|
||||
} // namespace indexing
|
||||
|
@ -765,7 +765,8 @@ void TensorIteratorBase::for_each(loop2d_t loop, int64_t grain_size) {
|
||||
if (numel == 0) {
|
||||
return;
|
||||
} else if (numel < grain_size || at::get_num_threads() == 1) {
|
||||
return serial_for_each(loop, {0, numel});
|
||||
serial_for_each(loop, {0, numel});
|
||||
return;
|
||||
} else {
|
||||
at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) {
|
||||
serial_for_each(loop, {begin, end});
|
||||
|
@ -49,7 +49,7 @@ static void check_unique_names(DimnameList names) {
|
||||
}
|
||||
|
||||
void check_names_valid_for(const TensorBase& tensor, DimnameList names) {
|
||||
return impl::check_names_valid_for(tensor.unsafeGetTensorImpl(), names);
|
||||
impl::check_names_valid_for(tensor.unsafeGetTensorImpl(), names);
|
||||
}
|
||||
|
||||
void check_names_valid_for(size_t tensor_dim, DimnameList names) {
|
||||
|
@ -138,7 +138,7 @@ void Tensor::_backward(TensorList inputs,
|
||||
const std::optional<Tensor>& gradient,
|
||||
std::optional<bool> keep_graph,
|
||||
bool create_graph) const {
|
||||
return impl::GetVariableHooks()->_backward(*this, inputs, gradient, keep_graph, create_graph);
|
||||
impl::GetVariableHooks()->_backward(*this, inputs, gradient, keep_graph, create_graph);
|
||||
}
|
||||
|
||||
const TensorBase& TensorBase::requires_grad_(bool _requires_grad) const {
|
||||
|
@ -496,7 +496,7 @@ class TORCH_API OperatorHandle {
|
||||
}
|
||||
|
||||
void checkInvariants() const {
|
||||
return operatorDef_->op.checkInvariants();
|
||||
operatorDef_->op.checkInvariants();
|
||||
}
|
||||
|
||||
c10::ArrayRef<at::Tag> getTags() const {
|
||||
@ -932,7 +932,7 @@ inline void Dispatcher::redispatchBoxed(
|
||||
}
|
||||
#endif
|
||||
const auto& kernel = entry.lookup(dispatchKeySet);
|
||||
return kernel.callBoxed(op, dispatchKeySet, stack);
|
||||
kernel.callBoxed(op, dispatchKeySet, stack);
|
||||
}
|
||||
|
||||
} // namespace c10
|
||||
|
@ -1270,7 +1270,7 @@ void gemm_internal<float>(CUDABLAS_GEMM_ARGTYPES(float))
|
||||
}
|
||||
#if defined(USE_ROCM) && defined(USE_ROCM_CK_GEMM)
|
||||
else if (at::globalContext().blasPreferredBackend() == BlasBackend::Ck) {
|
||||
if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) { //no CK GEMM version for gfx1100
|
||||
if (at::detail::getCUDAHooks().isGPUArch({"gfx11", "gfx12"})) { //no CK GEMM version
|
||||
gemm_internal_cublaslt<float>(CUDABLAS_GEMM_ARGS(float));
|
||||
} else{
|
||||
at::native::gemm_internal_ck<float>(CUDABLAS_GEMM_ARGS(float));
|
||||
|
@ -326,6 +326,23 @@ bool CUDAHooks::supportsBFloat16ConvolutionWithCuDNNv8() const {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool CUDAHooks::supportsBFloat16RNNWithCuDNN() const {
|
||||
#if AT_CUDNN_ENABLED() && (CUDNN_VERSION >= 91300)
|
||||
if (!hasCUDA()) {
|
||||
return false;
|
||||
}
|
||||
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
|
||||
// Check for Volta cores
|
||||
if (prop->major >= 8) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
long CUDAHooks::versionCuDNN() const {
|
||||
#if AT_CUDNN_ENABLED()
|
||||
return CUDNN_VERSION;
|
||||
|
@ -45,6 +45,7 @@ struct CUDAHooks : public at::CUDAHooksInterface {
|
||||
bool supportsDilatedConvolutionWithCuDNN() const override;
|
||||
bool supportsDepthwiseConvolutionWithCuDNN() const override;
|
||||
bool supportsBFloat16ConvolutionWithCuDNNv8() const override;
|
||||
bool supportsBFloat16RNNWithCuDNN() const override;
|
||||
bool hasCUDART() const override;
|
||||
long versionCUDART() const override;
|
||||
long versionCuDNN() const override;
|
||||
|
@ -166,6 +166,10 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool supportsBFloat16RNNWithCuDNN() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual long versionCuDNN() const {
|
||||
TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);
|
||||
}
|
||||
|
@ -465,11 +465,11 @@ static void dynamicLayerBack(const c10::OperatorHandle& op, torch::jit::Stack* s
|
||||
|
||||
// used for functions that have aliasing operations but should be treated like they're out of place (i.e. lift_fresh)
|
||||
static void dynamicLayerBackGradSpecialCase(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
return dynamicLayerBack(op, stack, true);
|
||||
dynamicLayerBack(op, stack, true);
|
||||
}
|
||||
|
||||
static void dynamicLayerBackFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
return dynamicLayerBack(op, stack, false);
|
||||
dynamicLayerBack(op, stack, false);
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, FuncTorchDynamicLayerFrontMode, m) {
|
||||
|
@ -375,7 +375,7 @@ static void bf16_gemv_trans(
|
||||
const at::BFloat16 beta,
|
||||
at::BFloat16* y,
|
||||
const int incy) {
|
||||
return bf16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||
bf16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -70,7 +70,7 @@ inline void searchsorted_maybe_trim_input_tensors(
|
||||
const Tensor& raw_boundaries) {
|
||||
Tensor trimmed_sorter;
|
||||
Tensor raw_sorter;
|
||||
return searchsorted_maybe_trim_input_tensors(
|
||||
searchsorted_maybe_trim_input_tensors(
|
||||
trimmed_input,
|
||||
trimmed_boundaries,
|
||||
trimmed_sorter,
|
||||
|
@ -93,6 +93,12 @@ inline bool cond_cudnn_grid_sampler(
|
||||
const TensorBase& input,
|
||||
const TensorBase& grid
|
||||
) {
|
||||
auto st = input.scalar_type();
|
||||
if (!(st == kDouble || st == kFloat || st == kHalf))
|
||||
return false;
|
||||
st = grid.scalar_type();
|
||||
if (!(st == kDouble || st == kFloat || st == kHalf))
|
||||
return false;
|
||||
return (
|
||||
at::native::cudnn_is_acceptable(input) &&
|
||||
at::native::cudnn_is_acceptable(grid) &&
|
||||
|
@ -108,6 +108,13 @@ bool use_mkldnn(const Tensor& input, TensorList params, TensorList hx) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool use_cudnn(const Tensor& t) {
|
||||
bool acceptable = at::cudnn_is_acceptable(t);
|
||||
auto st = t.scalar_type();
|
||||
bool bfloat16_cond = st == kBFloat16 && at::detail::getCUDAHooks().supportsBFloat16RNNWithCuDNN();
|
||||
return acceptable && (bfloat16_cond || st == kDouble || st == kFloat || st == kHalf);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
using pair_of = std::pair<T, T>;
|
||||
|
||||
@ -1200,7 +1207,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor> _thnn_fused_lstm_cell_backwar
|
||||
bool train, \
|
||||
bool bidirectional, \
|
||||
bool batch_first) { \
|
||||
if (at::cudnn_is_acceptable(_input)) { \
|
||||
if (use_cudnn(_input)) { \
|
||||
Tensor output, hy; \
|
||||
NAME##_cudnn_stub( \
|
||||
_input.device().type(), \
|
||||
@ -1262,7 +1269,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor> _thnn_fused_lstm_cell_backwar
|
||||
double dropout_p, \
|
||||
bool train, \
|
||||
bool bidirectional) { \
|
||||
if (at::cudnn_is_acceptable(data)) { \
|
||||
if (use_cudnn(data)) { \
|
||||
Tensor output, hy; \
|
||||
NAME##_packed_cudnn_stub( \
|
||||
data.device().type(), \
|
||||
@ -1430,7 +1437,7 @@ std::tuple<Tensor, Tensor, Tensor> lstm(
|
||||
TensorList _params, bool has_biases,
|
||||
int64_t num_layers, double dropout_p, bool train, bool bidirectional, bool batch_first) {
|
||||
TORCH_CHECK(hx.size() == 2, "lstm expects two hidden states");
|
||||
if (at::cudnn_is_acceptable(_input)) {
|
||||
if (use_cudnn(_input)) {
|
||||
Tensor output, hy, cy;
|
||||
lstm_cudnn_stub(_input.device().type(), output, hy, cy, _input, hx, _params, has_biases,
|
||||
num_layers, dropout_p, train, bidirectional, batch_first);
|
||||
@ -1491,7 +1498,7 @@ std::tuple<Tensor, Tensor, Tensor> lstm(
|
||||
TensorList _params, bool has_biases,
|
||||
int64_t num_layers, double dropout_p, bool train, bool bidirectional) {
|
||||
TORCH_CHECK(hx.size() == 2, "lstm expects two hidden states");
|
||||
if (at::cudnn_is_acceptable(data)) {
|
||||
if (use_cudnn(data)) {
|
||||
Tensor output, hy, cy;
|
||||
lstm_packed_cudnn_stub(data.device().type(), output, hy, cy, data, batch_sizes, hx,
|
||||
_params, has_biases, num_layers, dropout_p, train, bidirectional);
|
||||
|
@ -23,14 +23,6 @@
|
||||
#include <ATen/Functions.h>
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#else
|
||||
#include <ATen/ops/_cast_Byte_native.h>
|
||||
#include <ATen/ops/_cast_Char_native.h>
|
||||
#include <ATen/ops/_cast_Double_native.h>
|
||||
#include <ATen/ops/_cast_Float_native.h>
|
||||
#include <ATen/ops/_cast_Half_native.h>
|
||||
#include <ATen/ops/_cast_Int_native.h>
|
||||
#include <ATen/ops/_cast_Long_native.h>
|
||||
#include <ATen/ops/_cast_Short_native.h>
|
||||
#include <ATen/ops/_dim_arange_native.h>
|
||||
#include <ATen/ops/_efficientzerotensor_native.h>
|
||||
#include <ATen/ops/_empty_affine_quantized.h>
|
||||
|
@ -91,9 +91,6 @@ bool cudnn_is_acceptable(const TensorBase& self) {
|
||||
return false;
|
||||
if (!self.is_cuda())
|
||||
return false;
|
||||
auto st = self.scalar_type();
|
||||
if (!(st == kDouble || st == kFloat || st == kHalf))
|
||||
return false;
|
||||
if (!detail::getCUDAHooks().compiledWithCuDNN())
|
||||
return false;
|
||||
// cuDNN functions like grid_sampler returns CUDNN_STATUS_BAD_PARAM on empty
|
||||
|
@ -25,11 +25,11 @@
|
||||
namespace at::native {
|
||||
|
||||
void _backward(const Tensor& self, TensorList inputs, const std::optional<Tensor>& gradient_opt, std::optional<bool> keep_graph, bool create_graph) {
|
||||
return self._backward(inputs, gradient_opt, keep_graph, create_graph);
|
||||
self._backward(inputs, gradient_opt, keep_graph, create_graph);
|
||||
}
|
||||
|
||||
void set_data(Tensor& self, const Tensor& new_data) {
|
||||
return self.set_data(new_data);
|
||||
self.set_data(new_data);
|
||||
}
|
||||
|
||||
Tensor data(const Tensor& self) {
|
||||
@ -54,7 +54,7 @@ Tensor& requires_grad_(Tensor& self, bool _requires_grad) {
|
||||
}
|
||||
|
||||
void retain_grad(Tensor& self) {
|
||||
return self.retain_grad();
|
||||
self.retain_grad();
|
||||
}
|
||||
|
||||
bool retains_grad(const Tensor& self) {
|
||||
|
@ -300,7 +300,8 @@ void div_floor_kernel(TensorIteratorBase& iter) {
|
||||
// In the special case of unsigned integer division, floor division is
|
||||
// equivalent to truncation division (since the signs of the divisor and
|
||||
// dividend are always the same)
|
||||
return div_trunc_kernel(iter);
|
||||
div_trunc_kernel(iter);
|
||||
return;
|
||||
} else if (isIntegralType(dtype, /*includeBool*/ false)) {
|
||||
// There's no SIMD integer division, so don't try to vectorize it.
|
||||
AT_DISPATCH_INTEGRAL_TYPES(dtype, "div_floor_cpu", [&]() {
|
||||
|
@ -749,21 +749,29 @@ void flip_kernel(TensorIterator& iter, const bool quantized) {
|
||||
// });
|
||||
|
||||
if (iter_dtype == kByte) {
|
||||
return cpu_hflip_vec<uint8_t>(iter);
|
||||
cpu_hflip_vec<uint8_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kChar) {
|
||||
return cpu_hflip_vec<int8_t>(iter);
|
||||
cpu_hflip_vec<int8_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kInt) {
|
||||
return cpu_hflip_vec<int32_t>(iter);
|
||||
cpu_hflip_vec<int32_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kLong) {
|
||||
return cpu_hflip_vec<int64_t>(iter);
|
||||
cpu_hflip_vec<int64_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kShort) {
|
||||
return cpu_hflip_vec<int16_t>(iter);
|
||||
cpu_hflip_vec<int16_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kBool) {
|
||||
return cpu_hflip_vec<bool>(iter);
|
||||
cpu_hflip_vec<bool>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kFloat) {
|
||||
return cpu_hflip_vec<float>(iter);
|
||||
cpu_hflip_vec<float>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kDouble) {
|
||||
return cpu_hflip_vec<double>(iter);
|
||||
cpu_hflip_vec<double>(iter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// other dtypes (float16, bfloat16, complex) are handled by cpu_kernel_vec (see below)
|
||||
@ -778,10 +786,12 @@ void flip_kernel(TensorIterator& iter, const bool quantized) {
|
||||
c == input_strides_2[1] &&
|
||||
c == iter.element_size(0) * iter.shape()[0] // checks if dim=1 is contiguous as well
|
||||
) {
|
||||
return cpu_hflip_channels_last_vec(iter);
|
||||
cpu_hflip_channels_last_vec(iter);
|
||||
return;
|
||||
}
|
||||
// Special case: vertical flip using memcpy (faster than generic cpu_kernel_vec)
|
||||
return cpu_vflip_memcpy(iter);
|
||||
cpu_vflip_memcpy(iter);
|
||||
return;
|
||||
}
|
||||
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kBool, kHalf, kBFloat16, iter.dtype(), "flip_cpu",
|
||||
|
@ -96,11 +96,14 @@ static void pow_tensor_scalar_kernel(
|
||||
dtype == kBFloat16 || isComplexType(dtype)) {
|
||||
// Dispatch to fast specialization for sqrt, rsqrt and reciprocal
|
||||
if (exp_scalar.equal(.5)) {
|
||||
return sqrt_kernel(iter);
|
||||
sqrt_kernel(iter);
|
||||
return;
|
||||
} else if (exp_scalar.equal(-0.5)) {
|
||||
return rsqrt_kernel(iter);
|
||||
rsqrt_kernel(iter);
|
||||
return;
|
||||
} else if (exp_scalar.equal(-1.0)) {
|
||||
return reciprocal_kernel(iter);
|
||||
reciprocal_kernel(iter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -256,10 +256,10 @@ static void norm_kernel_tensor_iterator_impl(
|
||||
} else {
|
||||
if (iter.input_dtype() == kHalf && iter.dtype(0) == kFloat) {
|
||||
// type promotion that does cast and reduction in a single kernel
|
||||
return norm_kernel_cpu_impl<at::Half, float>(iter, val);
|
||||
norm_kernel_cpu_impl<at::Half, float>(iter, val); return;
|
||||
} else if (iter.input_dtype() == kBFloat16 && iter.dtype(0) == kFloat) {
|
||||
// type promotion that does cast and reduction in a single kernel
|
||||
return norm_kernel_cpu_impl<at::BFloat16, float>(iter, val);
|
||||
norm_kernel_cpu_impl<at::BFloat16, float>(iter, val); return;
|
||||
}
|
||||
|
||||
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND3(kHalf, kBFloat16, kComplexHalf, iter.input_dtype(), "norm_cpu", [&] {
|
||||
|
@ -428,10 +428,11 @@ void fp16_gemv_trans(
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(incx == 1 && alpha == 1.0);
|
||||
#if !defined(__aarch64__) || defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
|
||||
if (at::globalContext().allowFP16ReductionCPU()) {
|
||||
return fp16_gemv_trans_fp16_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
fp16_gemv_trans_fp16_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
return fp16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
fp16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
}
|
||||
|
||||
float bf16_dot_with_fp32_arith(const at::BFloat16* vec1, const at::BFloat16* vec2, int64_t len) {
|
||||
@ -465,7 +466,7 @@ void bf16_gemv_trans(
|
||||
at::BFloat16* y,
|
||||
const int incy) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(incx == 1 && alpha == 1.0 && beta == 0.0);
|
||||
return bf16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, y, incy);
|
||||
bf16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, y, incy);
|
||||
}
|
||||
|
||||
float fp16_dot(
|
||||
|
@ -285,8 +285,8 @@ static bool isSupportedHipLtROCmArch(int index) {
|
||||
#if ROCM_VERSION >= 60300
|
||||
"gfx1100", "gfx1101", "gfx1200", "gfx1201", "gfx908",
|
||||
#endif
|
||||
#if ROCM_VERSION >= 60500
|
||||
"gfx950"
|
||||
#if ROCM_VERSION >= 70000
|
||||
"gfx950", "gfx1150", "gfx1151"
|
||||
#endif
|
||||
};
|
||||
return at::detail::getCUDAHooks().isGPUArch(archs, index);
|
||||
|
@ -121,7 +121,7 @@ void cufft_set_plan_cache_max_size_impl(DeviceIndex device_index, int64_t max_si
|
||||
"cufft_set_plan_cache_max_size: expected 0 <= device_index < ",
|
||||
at::detail::getCUDAHooks().deviceCount(), "], but got device_index=",
|
||||
device_index);
|
||||
return cufft_get_plan_cache(device_index).resize(max_size);
|
||||
cufft_get_plan_cache(device_index).resize(max_size);
|
||||
}
|
||||
|
||||
int64_t cufft_get_plan_cache_size_impl(DeviceIndex device_index) {
|
||||
@ -137,7 +137,7 @@ void cufft_clear_plan_cache_impl(DeviceIndex device_index) {
|
||||
"cufft_clear_plan_cache: expected 0 <= device_index < ",
|
||||
at::detail::getCUDAHooks().deviceCount(), "], but got device_index=",
|
||||
device_index);
|
||||
return cufft_get_plan_cache(device_index).clear();
|
||||
cufft_get_plan_cache(device_index).clear();
|
||||
}
|
||||
|
||||
} // namespace at::native::detail
|
||||
|
@ -230,7 +230,7 @@ constexpr int BLOCK_THREADS = 256;
|
||||
constexpr int RADIX_BITS = 8;
|
||||
constexpr int RADIX_DIGITS = 1 << RADIX_BITS; // 2 ^ RADIX_BITS
|
||||
constexpr int RADIX_MASK = (RADIX_DIGITS - 1);
|
||||
static_assert(RADIX_DIGITS <= BLOCK_THREADS, "radixFindKthValues kernel requires RADIX_DIGITS <= BLOCK_THREADS");
|
||||
static_assert(RADIX_DIGITS <= BLOCK_THREADS, "RADIX_DIGITS must be <= BLOCK_THREADS");
|
||||
constexpr int MIN_ITEMS_PER_THREAD = 4;
|
||||
constexpr int MAX_ITEMS_PER_THREAD = 64;
|
||||
|
||||
@ -242,11 +242,10 @@ __global__ void fill(T* x, T value, IndexType size) {
|
||||
}
|
||||
}
|
||||
|
||||
// find the kth smallest value,
|
||||
// for largest topk, k_to_find = slice_size - k + 1
|
||||
// compute local histogram for each block
|
||||
template <typename T, typename IndexType, typename Bitwise, int Dim>
|
||||
C10_LAUNCH_BOUNDS_1(BLOCK_THREADS)
|
||||
__global__ void radixFindKthValues(
|
||||
__global__ void computeBlockDigitCounts(
|
||||
at::cuda::detail::TensorInfo<const T, IndexType> input,
|
||||
uint32_t slice_size,
|
||||
uint32_t* ks_to_find, // size: num_slices, unused arg but for mysterious reasons perf is better when it's present
|
||||
@ -321,12 +320,51 @@ __global__ void radixFindKthValues(
|
||||
}
|
||||
}
|
||||
|
||||
// compute global histogram and cumsum for each row
|
||||
__global__ void computeDigitCumSum(
|
||||
short* counts,
|
||||
uint32_t* digit_cum_sum,
|
||||
uint32_t blocks_per_slice) {
|
||||
int tidx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int digit_idx = threadIdx.x;
|
||||
uint32_t slice_idx = blockIdx.x;
|
||||
|
||||
typedef cub::BlockScan<uint32_t, RADIX_DIGITS> BlockScan;
|
||||
__shared__ typename BlockScan::TempStorage scan_storage;
|
||||
// accumulates counters from multiple blocks
|
||||
uint32_t digit_count = 0;
|
||||
if (threadIdx.x < RADIX_DIGITS) {
|
||||
constexpr int HISTO_ACCUM_TILE = 4;
|
||||
uint32_t rounds = blocks_per_slice / HISTO_ACCUM_TILE;
|
||||
for (int iter = 0; iter < rounds; iter++) {
|
||||
int base = HISTO_ACCUM_TILE * iter;
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HISTO_ACCUM_TILE; j++) {
|
||||
int blk = base + j;
|
||||
digit_count += counts[(slice_idx * blocks_per_slice + blk) * RADIX_DIGITS + digit_idx];
|
||||
}
|
||||
}
|
||||
for (int blk = HISTO_ACCUM_TILE * rounds; blk < blocks_per_slice; blk++) {
|
||||
digit_count += counts[(slice_idx * blocks_per_slice + blk) * RADIX_DIGITS + digit_idx];
|
||||
}
|
||||
|
||||
}
|
||||
// compute the block-wide inclusive prefix sum
|
||||
uint32_t digit_count_cumsum;
|
||||
BlockScan(scan_storage).InclusiveSum(digit_count, digit_count_cumsum);
|
||||
__syncthreads();
|
||||
if (threadIdx.x < RADIX_DIGITS) {
|
||||
digit_cum_sum[tidx] = digit_count_cumsum;
|
||||
}
|
||||
}
|
||||
|
||||
// Assumption: k can not be larger than UINT32_MAX
|
||||
template <typename Bitwise, typename T>
|
||||
C10_LAUNCH_BOUNDS_1(RADIX_DIGITS) // one thread per digit
|
||||
__global__ void computeBlockwiseWithinKCounts(
|
||||
Bitwise* desires_in, // size: num_slices
|
||||
short* counts, // size: num_slices * blocks_per_slice * radix_digits
|
||||
uint32_t* digit_cum_sum,
|
||||
uint32_t* ks_to_find_in, // size: num_slices
|
||||
uint32_t blocks_per_slice,
|
||||
int current_bit,
|
||||
@ -338,7 +376,7 @@ __global__ void computeBlockwiseWithinKCounts(
|
||||
Bitwise* desires_out,
|
||||
uint32_t num_blocks
|
||||
) {
|
||||
// This kernel should be launched with the same number of blocks as the `radixFindKthValues` kernel.
|
||||
// This kernel should be launched with the same number of blocks as the `computeBlockDigitCounts` kernel.
|
||||
int tidx = threadIdx.x;
|
||||
uint32_t block_idx = getLinearBlockId<uint32_t>();
|
||||
uint32_t slice_idx = block_idx / blocks_per_slice;
|
||||
@ -351,36 +389,15 @@ __global__ void computeBlockwiseWithinKCounts(
|
||||
if (block_idx >= num_blocks) {
|
||||
return;
|
||||
}
|
||||
typedef cub::BlockScan<uint32_t, BLOCK_THREADS> BlockScan;
|
||||
union __align__(16) TempStorage {
|
||||
uint32_t digit_count_cumsum[RADIX_DIGITS]; // only used if this it the last block for this slice
|
||||
typename BlockScan::TempStorage scan_storage;
|
||||
};
|
||||
__shared__ TempStorage temp_storage;
|
||||
|
||||
// accumulates counters from multiple blocks
|
||||
uint32_t digit_count = 0;
|
||||
if (tidx < RADIX_DIGITS) {
|
||||
for (int blk = 0; blk < blocks_per_slice; ++blk) {
|
||||
digit_count += counts[(slice_idx * blocks_per_slice + blk) * RADIX_DIGITS + tidx];
|
||||
}
|
||||
}
|
||||
|
||||
// compute the block-wide inclusive prefix sum
|
||||
uint32_t digit_count_cumsum;
|
||||
BlockScan(temp_storage.scan_storage).InclusiveSum(digit_count, digit_count_cumsum);
|
||||
__syncthreads();
|
||||
// every thread also need the perfix_sum of it's left value for comparison, so save a copy in shared mem
|
||||
if (tidx < RADIX_DIGITS) {
|
||||
temp_storage.digit_count_cumsum[tidx] = digit_count_cumsum;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
__shared__ Bitwise desired;
|
||||
uint32_t k_to_find = ks_to_find_in[slice_idx];
|
||||
|
||||
if (tidx < RADIX_DIGITS) {
|
||||
uint32_t digit_count_cumsum_left = (tidx == 0) ? 0 : temp_storage.digit_count_cumsum[tidx - 1];
|
||||
uint32_t position = slice_idx * RADIX_DIGITS + tidx;
|
||||
uint32_t digit_count_cumsum = digit_cum_sum[position];
|
||||
uint32_t digit_count_cumsum_left = (tidx == 0) ? 0 : digit_cum_sum[position - 1];
|
||||
|
||||
// if not the last pass: update desired and ks_to_find
|
||||
// if last pass: write out the kth value
|
||||
@ -466,7 +483,7 @@ template <typename Bitwise>
|
||||
__global__ void computeBlockwiseKthCounts(
|
||||
Bitwise* desires, // size: num_slices
|
||||
short* counts, // size: num_slices * blocks_per_slice * radix_digits
|
||||
uint32_t num_blocks, // the number of blocks used by `radixFindKthValues` kernel
|
||||
uint32_t num_blocks, // the number of blocks used by `computeBlockDigitCounts` kernel
|
||||
uint32_t blocks_per_slice,
|
||||
// outputs:
|
||||
uint32_t* kthCounts // size: num_slices * blocks_per_slice == num_blocks
|
||||
@ -649,9 +666,7 @@ void launch(
|
||||
T* kthValues = reinterpret_cast<T*>(kthValues_buffer.get());
|
||||
|
||||
TORCH_CHECK(blocks_per_slice <= std::numeric_limits<uint32_t>::max(), "blocks_per_slice larger than uint32 maximum is not supported");
|
||||
auto semaphores_buffer = allocator.allocate(numInputSlices * sizeof(uint32_t));
|
||||
uint32_t* semaphores = reinterpret_cast<uint32_t*>(semaphores_buffer.get());
|
||||
AT_CUDA_CHECK(cudaMemsetAsync(semaphores, 0, numInputSlices * sizeof(uint32_t), stream));
|
||||
|
||||
|
||||
auto ks_to_find_buffer = allocator.allocate(2 * numInputSlices * sizeof(uint32_t));
|
||||
uint32_t* ks_to_find = reinterpret_cast<uint32_t*>(ks_to_find_buffer.get());
|
||||
@ -668,6 +683,10 @@ void launch(
|
||||
static_assert(MAX_ITEMS_PER_THREAD * BLOCK_THREADS < std::numeric_limits<short>::max(),
|
||||
"blockwise counter too large");
|
||||
|
||||
auto digit_cum_sum_buffer = allocator.allocate(numInputSlices * RADIX_DIGITS * sizeof(uint32_t));
|
||||
uint32_t* digit_cum_sum = reinterpret_cast<uint32_t*>(digit_cum_sum_buffer.get());
|
||||
AT_CUDA_CHECK(cudaMemsetAsync(digit_cum_sum, 0, numInputSlices * RADIX_DIGITS * sizeof(uint32_t), stream));
|
||||
|
||||
#if CUB_SUPPORTS_SCAN_BY_KEY()
|
||||
auto withinKCounts_buffer = allocator.allocate(num_blocks * sizeof(uint32_t));
|
||||
uint32_t* withinKCounts = reinterpret_cast<uint32_t*>(withinKCounts_buffer.get());
|
||||
@ -691,7 +710,7 @@ void launch(
|
||||
|
||||
// iterate radix bits for multiple passes
|
||||
for (int current_bit = sizeof(T) * 8 - RADIX_BITS; current_bit >= 0; current_bit -= RADIX_BITS) {
|
||||
radixFindKthValues<T, IndexType, Bitwise, Dim><<<grid, block, 0, stream>>>(
|
||||
computeBlockDigitCounts<T, IndexType, Bitwise, Dim><<<grid, block, 0, stream>>>(
|
||||
input,
|
||||
inputSliceSize,
|
||||
ks_to_find_in, // unused arg
|
||||
@ -704,10 +723,14 @@ void launch(
|
||||
desired_in,
|
||||
counts);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
|
||||
computeDigitCumSum<<<numInputSlices, RADIX_DIGITS, 0, stream>>>(counts, digit_cum_sum, blocks_per_slice);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
|
||||
// we unconditionally call this kernel to update desired/ks_to_find/kthValues
|
||||
// if cub supports scan_by_key we additionally do k counts
|
||||
computeBlockwiseWithinKCounts<Bitwise, T><<<grid, RADIX_DIGITS, 0, stream>>>(
|
||||
desired_in, counts, ks_to_find_in, blocks_per_slice, current_bit, largest, withinKCounts, kthValues, ks_to_find_out, desired_out, num_blocks);
|
||||
desired_in, counts, digit_cum_sum, ks_to_find_in, blocks_per_slice, current_bit, largest, withinKCounts, kthValues, ks_to_find_out, desired_out, num_blocks);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
// swap desired/ks_to_find in and out for next iter
|
||||
auto tmp_desired = desired_in;
|
||||
|
@ -1107,10 +1107,14 @@ void ldl_factor_kernel(
|
||||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
switch (preferred_backend) {
|
||||
case at::LinalgBackend::Cusolver:
|
||||
return ldl_factor_cusolver(
|
||||
{ ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Magma:
|
||||
return ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
{ ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
// By default use cusolver if available and magma otherwise.
|
||||
// If cusolver and magma 2.5.4+ are both available and hermitian=true,
|
||||
@ -1122,8 +1126,10 @@ void ldl_factor_kernel(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
}
|
||||
#endif
|
||||
return ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
{ ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
return ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
#endif
|
||||
@ -1839,11 +1845,14 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) {
|
||||
// For the benchmarks see
|
||||
// https://github.com/pytorch/pytorch/pull/56253#discussion_r622851107
|
||||
if (input.size(-2) <= 256 && batchCount(input) >= std::max<int64_t>(2, input.size(-2) / 16)) {
|
||||
return geqrf_batched_cublas(input, tau);
|
||||
geqrf_batched_cublas(input, tau);
|
||||
return;
|
||||
} else {
|
||||
return geqrf_cusolver(input, tau);
|
||||
geqrf_cusolver(input, tau);
|
||||
return;
|
||||
}
|
||||
return geqrf_batched_cublas(input, tau);
|
||||
geqrf_batched_cublas(input, tau);
|
||||
return;
|
||||
};
|
||||
|
||||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
@ -1856,10 +1865,14 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) {
|
||||
// - ?geqrf_gpu allows fast computation of Q via ?orgqr_gpu, but doesn't give R properly.
|
||||
// - ?geqrf2_gpu gives correct R, but doesn't allow computation of Q via ?orgqr_gpu
|
||||
case at::LinalgBackend::Magma:
|
||||
return geqrf_magma(input, tau);
|
||||
{ geqrf_magma(input, tau);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Cusolver:
|
||||
default:
|
||||
return geqrf_cusolver_backend(input, tau);
|
||||
{ geqrf_cusolver_backend(input, tau);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
return geqrf_magma(input, tau);
|
||||
@ -2703,13 +2716,17 @@ void gels_looped(const Tensor& a, Tensor& b, Tensor& infos) {
|
||||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
switch (preferred_backend) {
|
||||
case at::LinalgBackend::Magma:
|
||||
return gels_magma(a, b, infos);
|
||||
{ gels_magma(a, b, infos);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Cusolver:
|
||||
default:
|
||||
// linalg_lstsq_gels is a generic function that is implemented using
|
||||
// geqrf_stub, ormqr_stub, and triangular_solve_stub
|
||||
// It dispatches to cuSOLVER for CUDA inputs if USE_LINALG_SOLVER is defined
|
||||
return linalg_lstsq_gels(a, b, infos);
|
||||
{ linalg_lstsq_gels(a, b, infos);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
return gels_magma(a, b, infos);
|
||||
|
@ -1222,7 +1222,7 @@ cudnnRNNAlgo_t get_algo(
|
||||
}
|
||||
|
||||
cudnnDataType_t promote_rnn_math_type(cudnnDataType_t dtype) {
|
||||
if (dtype == CUDNN_DATA_HALF) {
|
||||
if (dtype == CUDNN_DATA_HALF || dtype == CUDNN_DATA_BFLOAT16) {
|
||||
return CUDNN_DATA_FLOAT;
|
||||
}
|
||||
return dtype;
|
||||
|
@ -772,13 +772,21 @@ void dispatch_bfloat16_gemm_wmma(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
|
||||
|
||||
template <>
|
||||
void gemm_internal_ck<at::BFloat16>(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)) {
|
||||
auto dprops = at::cuda::getCurrentDeviceProperties();
|
||||
std::string_view arch(dprops->gcnArchName);
|
||||
if (arch == "gfx1100") {
|
||||
static const std::vector<std::string> wmma_archs = {
|
||||
"gfx1100", "gfx1101", "gfx1102", "gfx1200", "gfx1201",
|
||||
#if ROCM_VERSION >= 70000
|
||||
"gfx1150", "gfx1151"
|
||||
#endif
|
||||
};
|
||||
if (at::detail::getCUDAHooks().isGPUArch(wmma_archs)) {
|
||||
dispatch_bfloat16_gemm_wmma(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
||||
} else{
|
||||
}
|
||||
else if (at::detail::getCUDAHooks().isGPUArch({"gfx9"})) {
|
||||
dispatch_bfloat16_gemm(CUDABLAS_GEMM_ARGS(at::BFloat16));
|
||||
}
|
||||
else {
|
||||
TORCH_CHECK(false, "gemm_internal_ck<at::BFloat16> unsupported gfx arch");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -599,11 +599,21 @@ void dispatch_half_gemm_wmma(CUDABLAS_GEMM_ARGTYPES(at::Half)) {
|
||||
|
||||
template <>
|
||||
void gemm_internal_ck<at::Half>(CUDABLAS_GEMM_ARGTYPES(at::Half)) {
|
||||
if (at::detail::getCUDAHooks().isGPUArch({"gfx1100"})) {
|
||||
static const std::vector<std::string> wmma_archs = {
|
||||
"gfx1100", "gfx1101", "gfx1102", "gfx1200", "gfx1201",
|
||||
#if ROCM_VERSION >= 70000
|
||||
"gfx1150", "gfx1151"
|
||||
#endif
|
||||
};
|
||||
if (at::detail::getCUDAHooks().isGPUArch(wmma_archs)) {
|
||||
dispatch_half_gemm_wmma(CUDABLAS_GEMM_ARGS(at::Half));
|
||||
} else{
|
||||
}
|
||||
else if (at::detail::getCUDAHooks().isGPUArch({"gfx9"})) {
|
||||
dispatch_half_gemm(CUDABLAS_GEMM_ARGS(at::Half));
|
||||
}
|
||||
else {
|
||||
TORCH_CHECK(false, "gemm_internal_ck<at::Half> unsupported gfx arch");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -373,59 +373,67 @@ void addmm_out_sparse_csr(
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(0, 1).to_sparse_csr(),
|
||||
mat1.transpose(0, 1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(0, 1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseBsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseCsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_sparse_input_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_sparse_input_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
return addmm_sparse_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_sparse_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: CSR @ CSC kernel would be very fast due to format alignment
|
||||
return addmm_sparse_input_dense_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_input_dense_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: CSR @ CSC kernel would be very fast due to format alignment
|
||||
return addmm_sparse_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -433,56 +441,62 @@ void addmm_out_sparse_csr(
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: avoid csc->csr conversion with native csc support
|
||||
return addmm_dense_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
addmm_dense_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: avoid csc->csr conversion with native csc support
|
||||
return addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_sparse_input_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
addmm_sparse_input_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO avoid csc->csr
|
||||
return addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsc) {
|
||||
return addmm_sparse_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
addmm_sparse_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseBsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"addmm: computation on CPU is not implemented for ",
|
||||
result.layout(),
|
||||
" + ",
|
||||
mat1.layout(),
|
||||
" @ ",
|
||||
mat2.layout());
|
||||
false,
|
||||
"addmm: computation on CPU is not implemented for ",
|
||||
result.layout(),
|
||||
" + ",
|
||||
mat1.layout(),
|
||||
" @ ",
|
||||
mat2.layout());
|
||||
}
|
||||
|
||||
/*
|
||||
@ -496,16 +510,16 @@ void addmm_out_sparse_csr(
|
||||
[out] result of the operation.
|
||||
*/
|
||||
void addmv_out_sparse_csr(
|
||||
const Tensor& mat,
|
||||
const Tensor& vec,
|
||||
const Scalar& beta,
|
||||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
const Tensor& mat,
|
||||
const Tensor& vec,
|
||||
const Scalar& beta,
|
||||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
#if !AT_USE_MKL_SPARSE()
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Calling addmv on a sparse CPU tensor requires Linux platform. ",
|
||||
"Please use PyTorch built with MKL on Linux.");
|
||||
false,
|
||||
"Calling addmv on a sparse CPU tensor requires Linux platform. ",
|
||||
"Please use PyTorch built with MKL on Linux.");
|
||||
#else
|
||||
c10::MaybeOwned<Tensor> result_ = prepare_dense_vector_for_mkl(result);
|
||||
c10::MaybeOwned<Tensor> vec_ = prepare_dense_vector_for_mkl(vec);
|
||||
|
@ -5,38 +5,6 @@
|
||||
# representing ScalarType's. They are now superseded by usage of
|
||||
# `aten::to()`. The ops remain here for backward compatibility purposes.
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Byte(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Char(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Double(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Float(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Int(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Long(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Short(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Half(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# Computes the gradient of current tensor w.r.t. graph leaves.
|
||||
- func: _backward(Tensor self, Tensor[] inputs, Tensor? gradient=None, bool? retain_graph=None, bool create_graph=False) -> ()
|
||||
manual_cpp_binding: True
|
||||
|
@ -810,7 +810,8 @@ void addmm_out_sparse_csr(
|
||||
if (mat1.layout() == kSparseBsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided)
|
||||
return block_sparse_mm(input, mat1, mat2, beta, alpha, result);
|
||||
{ block_sparse_mm(input, mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -819,13 +820,13 @@ void addmm_out_sparse_csr(
|
||||
if (result.layout() == kStrided) {
|
||||
auto result_t = result.transpose(-2, -1);
|
||||
auto input_t = (result.is_same(input) ? result_t : input.transpose(-2, -1));
|
||||
return block_sparse_mm(
|
||||
block_sparse_mm(
|
||||
input_t,
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result_t);
|
||||
result_t); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -840,41 +841,41 @@ void addmm_out_sparse_csr(
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spmm(
|
||||
spmm(
|
||||
mat2.transpose(0, 1).to_sparse_csr(),
|
||||
mat1.transpose(0, 1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(0, 1));
|
||||
result.transpose(0, 1)); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return spmm(
|
||||
spmm(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
result.transpose(-2, -1)); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseCsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return spmm(mat1, mat2, beta, alpha, result);
|
||||
spmm(mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
return spgemm(mat1, mat2, beta, alpha, result);
|
||||
spgemm(mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
// CSR @ CSC kernel would be very fast due to format alignment
|
||||
return spgemm(mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
spgemm(mat1, mat2.to_sparse_csr(), beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -882,27 +883,28 @@ void addmm_out_sparse_csr(
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spmm(mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
spmm(mat1.to_sparse_csr(), mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr)
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spgemm(mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
{ spgemm(mat1.to_sparse_csr(), mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spgemm(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
spgemm(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result); return;
|
||||
}
|
||||
if (result.layout() == kSparseCsc) {
|
||||
return spgemm(
|
||||
spgemm(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
result.transpose(-2, -1)); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -933,7 +935,7 @@ void addmv_out_sparse_csr(
|
||||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
if (mat.layout() == kSparseBsr) {
|
||||
return block_sparse_mv(mat, vec, beta, alpha, result);
|
||||
block_sparse_mv(mat, vec, beta, alpha, result); return;
|
||||
}
|
||||
cusparseOperation_t opA = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||
|
||||
@ -1213,9 +1215,9 @@ void triangular_solve_out_sparse_csr(
|
||||
}
|
||||
if (A.layout() == kSparseBsr) {
|
||||
if (B.size(-1) == 1) {
|
||||
return block_sparse_triangular_solve_vec(A, B, X, upper, transpose, unitriangular);
|
||||
block_sparse_triangular_solve_vec(A, B, X, upper, transpose, unitriangular); return;
|
||||
} else {
|
||||
return block_sparse_triangular_solve_mat(A, B, X, upper, transpose, unitriangular);
|
||||
block_sparse_triangular_solve_mat(A, B, X, upper, transpose, unitriangular); return;
|
||||
}
|
||||
}
|
||||
#ifdef USE_ROCM
|
||||
|
@ -117,7 +117,7 @@ class FwdKernel:
|
||||
def get_all(cls) -> list["FwdKernel"]:
|
||||
kernels: list[FwdKernel] = []
|
||||
for aligned, dtype, (sm, sm_max) in itertools.product(
|
||||
[True, False], DTYPES.keys(), zip(SM, SM[1:])
|
||||
[True, False], DTYPES.keys(), itertools.pairwise(SM)
|
||||
):
|
||||
# Remove some kernels we don't use
|
||||
if dtype == "bf16" and sm < 80:
|
||||
@ -228,7 +228,7 @@ class BwdKernel:
|
||||
for aligned, dtype, (sm, sm_max), apply_dropout, max_k in itertools.product(
|
||||
[True, False],
|
||||
DTYPES.keys(),
|
||||
zip(SM, SM[1:]),
|
||||
itertools.pairwise(SM),
|
||||
[True, False],
|
||||
[32, 64, 128, 2**16],
|
||||
):
|
||||
|
191
benchmarks/distributed/bench_nvshmem_tile_reduce.py
Normal file
191
benchmarks/distributed/bench_nvshmem_tile_reduce.py
Normal file
@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Benchmark for NVSHMEM tile reduce operations.
|
||||
|
||||
Usage:
|
||||
python benchmarks/distributed/bench_nvshmem_tile_reduce.py
|
||||
|
||||
This benchmark measures the performance of tile reduce operations across different
|
||||
matrix sizes and tile configurations.
|
||||
"""
|
||||
|
||||
import time
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import torch.distributed._symmetric_memory as symm_mem
|
||||
from torch.testing._internal.common_distributed import MultiProcContinuousTest
|
||||
from torch.testing._internal.common_utils import (
|
||||
requires_cuda_p2p_access,
|
||||
skip_but_pass_in_sandcastle_if,
|
||||
skipIfRocm,
|
||||
)
|
||||
|
||||
|
||||
# Decorator
|
||||
def requires_nvshmem():
|
||||
return skip_but_pass_in_sandcastle_if(
|
||||
not symm_mem.is_nvshmem_available(),
|
||||
"bench_nvshmem_tile_reduce requires NVSHMEM, skipping benchmark",
|
||||
)
|
||||
|
||||
|
||||
# So that benchmarks are written in device-agnostic way
|
||||
device_type = "cuda"
|
||||
device_module = torch.get_device_module(device_type)
|
||||
|
||||
|
||||
@requires_nvshmem()
|
||||
@requires_cuda_p2p_access()
|
||||
class NVSHMEMTileReduceBenchmark(MultiProcContinuousTest):
|
||||
def _init_device(self) -> None:
|
||||
# TODO: relieve this (seems to hang if without)
|
||||
device_module.set_device(self.device)
|
||||
# Set NVSHMEM as SymmMem backend
|
||||
symm_mem.set_backend("NVSHMEM")
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
return torch.device(device_type, self.rank)
|
||||
|
||||
def _benchmark_tile_reduce_single(
|
||||
self,
|
||||
full_size: int,
|
||||
tile_size: int,
|
||||
warmup_iters: int = 5,
|
||||
bench_iters: int = 10,
|
||||
) -> dict:
|
||||
"""
|
||||
Benchmark a single configuration of tile reduce.
|
||||
|
||||
Args:
|
||||
full_size: Size of the full matrix (full_size x full_size)
|
||||
warmup_iters: Number of warmup iterations
|
||||
bench_iters: Number of benchmark iterations
|
||||
|
||||
Returns:
|
||||
Dictionary with benchmark results
|
||||
"""
|
||||
self._init_device()
|
||||
group_name = dist.group.WORLD.group_name
|
||||
symm_mem.enable_symm_mem_for_group(group_name)
|
||||
|
||||
dtype = torch.float
|
||||
|
||||
# Allocate full matrices
|
||||
full_inp = symm_mem.empty(
|
||||
full_size, full_size, dtype=dtype, device=self.device
|
||||
).fill_(self.rank)
|
||||
full_out = symm_mem.empty(
|
||||
full_size, full_size, dtype=dtype, device=self.device
|
||||
).fill_(0)
|
||||
|
||||
slice_ut = slice(0, tile_size)
|
||||
inp_tile = full_inp[slice_ut, slice_ut]
|
||||
out_tile = full_out[slice_ut, slice_ut]
|
||||
|
||||
root = 0
|
||||
|
||||
# Warmup iterations
|
||||
for _ in range(warmup_iters):
|
||||
torch.ops.symm_mem.tile_reduce(inp_tile, out_tile, root, group_name)
|
||||
torch.cuda.synchronize(self.device)
|
||||
|
||||
# Benchmark iterations
|
||||
times = []
|
||||
|
||||
dist.barrier()
|
||||
torch.cuda.synchronize(self.device)
|
||||
start_time = time.perf_counter()
|
||||
|
||||
for _ in range(bench_iters):
|
||||
torch.ops.symm_mem.tile_reduce(inp_tile, out_tile, root, group_name)
|
||||
|
||||
torch.cuda.synchronize(self.device)
|
||||
end_time = time.perf_counter()
|
||||
times.append((end_time - start_time) / bench_iters)
|
||||
|
||||
# Calculate statistics
|
||||
times = torch.tensor(times, dtype=torch.float64)
|
||||
tile_elements = tile_size * tile_size
|
||||
tile_bytes = (
|
||||
tile_elements * dtype.itemsize
|
||||
if hasattr(dtype, "itemsize")
|
||||
else tile_elements * 4
|
||||
)
|
||||
|
||||
results = {
|
||||
"full_size": full_size,
|
||||
"tile_size": tile_size,
|
||||
"tile_elements": tile_elements,
|
||||
"tile_bytes": tile_bytes,
|
||||
"world_size": self.world_size,
|
||||
"mean_time_ms": times.mean().item() * 1000,
|
||||
"std_time_ms": times.std().item() * 1000,
|
||||
"min_time_ms": times.min().item() * 1000,
|
||||
"max_time_ms": times.max().item() * 1000,
|
||||
"throughput_gb_s": tile_bytes / (times.mean().item() * 1e9),
|
||||
"elements_per_sec": tile_elements / times.mean().item(),
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
@skipIfRocm
|
||||
def test_benchmark_tile_reduce_various_sizes(self) -> None:
|
||||
"""
|
||||
Benchmark tile reduce across various matrix sizes.
|
||||
"""
|
||||
# Test various matrix sizes
|
||||
tile_sizes = [512, 1024, 2048, 4096, 8192, 16384]
|
||||
full_size = tile_sizes[-1]
|
||||
warmup_iters = 5
|
||||
bench_iters = 20
|
||||
|
||||
results = []
|
||||
|
||||
for tile_size in tile_sizes:
|
||||
try:
|
||||
result = self._benchmark_tile_reduce_single(
|
||||
full_size, tile_size, warmup_iters, bench_iters
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
if self.rank == 0:
|
||||
print(
|
||||
f"Matrix Size: {full_size}x{full_size}, Tile Size: {tile_size}x{tile_size}"
|
||||
)
|
||||
print(
|
||||
f" Mean Time: {result['mean_time_ms']:.3f} ± {result['std_time_ms']:.3f} ms"
|
||||
)
|
||||
print(f" Throughput: {result['throughput_gb_s']:.2f} GB/s")
|
||||
print(f" Bytes: {result['tile_bytes']:.0f}")
|
||||
print()
|
||||
|
||||
except Exception as e:
|
||||
if self.rank == 0:
|
||||
print(f"Failed to benchmark matrix size {full_size}: {e}")
|
||||
|
||||
# Print summary
|
||||
if self.rank == 0 and results:
|
||||
print("=== BENCHMARK SUMMARY ===")
|
||||
print(
|
||||
f"{'Matrix Size':<12} {'Tile Size':<10} {'Time (ms)':<12} {'Throughput (GB/s)':<18} {'Bytes':<15}"
|
||||
)
|
||||
print("-" * 70)
|
||||
|
||||
for result in results:
|
||||
print(
|
||||
f"{result['full_size']}x{result['full_size']:<7} "
|
||||
f"{result['tile_size']}x{result['tile_size']:<5} "
|
||||
f"{result['mean_time_ms']:<12.3f} "
|
||||
f"{result['throughput_gb_s']:<18.2f} "
|
||||
f"{result['tile_bytes']:<15.0f}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# For standalone usage, you'd need to set up distributed environment
|
||||
# For now, this is meant to be run via the PyTorch test framework
|
||||
from torch.testing._internal.common_utils import run_tests
|
||||
|
||||
run_tests()
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,58 +6,26 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,0
|
||||
|
||||
|
||||
@ -66,10 +34,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -82,10 +46,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -94,10 +54,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -106,18 +62,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -126,26 +74,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,58 +6,26 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,0
|
||||
|
||||
|
||||
@ -66,10 +34,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -82,10 +46,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -94,10 +54,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -106,18 +62,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -126,26 +74,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -378,7 +378,7 @@ vgg16,pass,0
|
||||
|
||||
|
||||
|
||||
vision_maskrcnn,pass,18
|
||||
vision_maskrcnn,pass,20
|
||||
|
||||
|
||||
|
||||
|
|
@ -286,7 +286,7 @@ vgg16,pass,6
|
||||
|
||||
|
||||
|
||||
vision_maskrcnn,pass,37
|
||||
vision_maskrcnn,pass,39
|
||||
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,58 +6,26 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,0
|
||||
|
||||
|
||||
@ -66,10 +34,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -82,10 +46,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -94,10 +54,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -106,18 +62,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -126,26 +74,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -123,8 +123,6 @@ CI_SKIP_OPTIMIZER = {
|
||||
# HF
|
||||
"pnasnet5large", # Stack issue in fx
|
||||
"MobileBertForMaskedLM", # Stack issue in fx
|
||||
"MobileBertForQuestionAnswering", # Stack issue in fx
|
||||
"PegasusForConditionalGeneration", # OOM
|
||||
}
|
||||
|
||||
try:
|
||||
@ -192,17 +190,11 @@ BENCHMARK_USE_SGD = {
|
||||
# HF
|
||||
"AlbertForMaskedLM",
|
||||
"BartForCausalLM",
|
||||
"BartForConditionalGeneration",
|
||||
"BlenderbotSmallForCausalLM",
|
||||
"BlenderbotSmallForConditionalGeneration",
|
||||
"DebertaV2ForQuestionAnswering", # eager OOM
|
||||
"ElectraForCausalLM",
|
||||
"M2M100ForConditionalGeneration",
|
||||
"MBartForCausalLM",
|
||||
"MBartForConditionalGeneration",
|
||||
"OPTForCausalLM",
|
||||
"PLBartForCausalLM",
|
||||
"PLBartForConditionalGeneration",
|
||||
"PegasusForCausalLM",
|
||||
"TrOCRForCausalLM",
|
||||
"XGLMForCausalLM",
|
||||
@ -3789,7 +3781,6 @@ def run(runner, args, original_dir=None):
|
||||
torch.use_deterministic_algorithms(True, warn_only=True)
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
||||
if args.only is not None and args.only in {
|
||||
"DebertaForQuestionAnswering",
|
||||
"nvidia_deeprecommender",
|
||||
"crossvit_9_240",
|
||||
}:
|
||||
|
@ -59,7 +59,6 @@ imports = [
|
||||
"BigBirdConfig",
|
||||
"BlenderbotForConditionalGeneration",
|
||||
"BlenderbotModel",
|
||||
"BlenderbotSmallForConditionalGeneration",
|
||||
"BlenderbotSmallModel",
|
||||
"CLIPModel",
|
||||
"CLIPVisionModel",
|
||||
@ -73,7 +72,6 @@ imports = [
|
||||
"MarianForCausalLM",
|
||||
"MarianModel",
|
||||
"MarianMTModel",
|
||||
"PegasusForConditionalGeneration",
|
||||
"PegasusModel",
|
||||
"ReformerConfig",
|
||||
"ViTForImageClassification",
|
||||
@ -167,7 +165,7 @@ def get_sequence_length(model_cls, model_name):
|
||||
"Bert",
|
||||
"Roberta",
|
||||
)
|
||||
) or model_name in ("DistillGPT2", "GoogleFnet", "YituTechConvBert", "CamemBert"):
|
||||
) or model_name in ("DistillGPT2", "GoogleFnet", "YituTechConvBert"):
|
||||
seq_length = 512
|
||||
elif model_name in ("TrOCRForCausalLM"):
|
||||
seq_length = 256
|
||||
@ -222,9 +220,7 @@ def generate_inputs_for_model(
|
||||
BlenderbotModel,
|
||||
BlenderbotSmallModel,
|
||||
BlenderbotForConditionalGeneration,
|
||||
BlenderbotSmallForConditionalGeneration,
|
||||
PegasusModel,
|
||||
PegasusForConditionalGeneration,
|
||||
MarianModel,
|
||||
MarianMTModel,
|
||||
]:
|
||||
@ -333,10 +329,6 @@ EXTRA_MODELS = {
|
||||
AutoConfig.from_pretrained("YituTech/conv-bert-base"),
|
||||
AutoModelForMaskedLM,
|
||||
),
|
||||
"CamemBert": (
|
||||
AutoConfig.from_pretrained("camembert-base"),
|
||||
AutoModelForMaskedLM,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@ -375,8 +367,6 @@ class HuggingfaceRunner(BenchmarkRunner):
|
||||
|
||||
def use_larger_multiplier_for_smaller_tensor(self, name):
|
||||
return name in [
|
||||
"ElectraForQuestionAnswering",
|
||||
"MegatronBertForQuestionAnswering",
|
||||
"GPT2ForSequenceClassification",
|
||||
]
|
||||
|
||||
|
@ -31,24 +31,15 @@ batch_size:
|
||||
# TODO - Fails even after fake tensors
|
||||
divisors:
|
||||
AlbertForMaskedLM: 2
|
||||
AlbertForQuestionAnswering: 2
|
||||
AllenaiLongformerBase: 2
|
||||
BartForCausalLM: 2
|
||||
BartForConditionalGeneration: 2
|
||||
BertForMaskedLM: 2
|
||||
BertForQuestionAnswering: 2
|
||||
BlenderbotForCausalLM: 8
|
||||
# BlenderbotForConditionalGeneration : 16
|
||||
BlenderbotSmallForCausalLM: 4
|
||||
BlenderbotSmallForConditionalGeneration: 2
|
||||
CamemBert: 2
|
||||
DebertaV2ForMaskedLM: 4
|
||||
DebertaV2ForQuestionAnswering: 8
|
||||
DistilBertForMaskedLM: 2
|
||||
DistilBertForQuestionAnswering: 2
|
||||
DistillGPT2: 2
|
||||
ElectraForCausalLM: 2
|
||||
ElectraForQuestionAnswering: 2
|
||||
GPT2ForSequenceClassification: 2
|
||||
# GPTJForCausalLM : 2
|
||||
# GPTJForQuestionAnswering : 2
|
||||
@ -56,22 +47,15 @@ batch_size:
|
||||
# GPTNeoForSequenceClassification : 2
|
||||
GoogleFnet: 2
|
||||
LayoutLMForMaskedLM: 2
|
||||
LayoutLMForSequenceClassification: 2
|
||||
M2M100ForConditionalGeneration: 4
|
||||
MBartForCausalLM: 2
|
||||
MBartForConditionalGeneration: 2
|
||||
MT5ForConditionalGeneration: 2
|
||||
MegatronBertForCausalLM: 4
|
||||
MegatronBertForQuestionAnswering: 2
|
||||
MobileBertForMaskedLM: 2
|
||||
MobileBertForQuestionAnswering: 2
|
||||
OPTForCausalLM: 2
|
||||
PLBartForCausalLM: 2
|
||||
PLBartForConditionalGeneration: 2
|
||||
PegasusForCausalLM: 4
|
||||
PegasusForConditionalGeneration: 2
|
||||
RobertaForCausalLM: 2
|
||||
RobertaForQuestionAnswering: 2
|
||||
T5ForConditionalGeneration: 2
|
||||
T5Small: 2
|
||||
TrOCRForCausalLM: 2
|
||||
@ -90,20 +74,13 @@ batch_size:
|
||||
tolerance:
|
||||
higher_training:
|
||||
- MT5ForConditionalGeneration
|
||||
# AlbertForQuestionAnswering fails in CI GCP A100 but error does not seem
|
||||
# harmful.
|
||||
- AlbertForQuestionAnswering
|
||||
|
||||
higher_max_autotune_training:
|
||||
# DebertaForQuestionAnswering needs higher tolerance in Max-Autotune mode
|
||||
- DebertaForQuestionAnswering
|
||||
higher_max_autotune_training: []
|
||||
|
||||
higher_inference:
|
||||
- GPT2ForSequenceClassification
|
||||
- RobertaForQuestionAnswering
|
||||
|
||||
higher_inference_cpu:
|
||||
- LayoutLMForSequenceClassification
|
||||
- GPT2ForSequenceClassification
|
||||
|
||||
cosine: []
|
||||
|
@ -1,22 +1,13 @@
|
||||
AlbertForMaskedLM,8
|
||||
AlbertForQuestionAnswering,8
|
||||
AllenaiLongformerBase,8
|
||||
BartForCausalLM,8
|
||||
BartForConditionalGeneration,4
|
||||
BertForMaskedLM,32
|
||||
BertForQuestionAnswering,32
|
||||
BlenderbotForCausalLM,32
|
||||
BlenderbotForConditionalGeneration,16
|
||||
BlenderbotSmallForCausalLM,256
|
||||
BlenderbotSmallForConditionalGeneration,128
|
||||
CamemBert,32
|
||||
DebertaV2ForMaskedLM,8
|
||||
DebertaV2ForQuestionAnswering,8
|
||||
DistilBertForMaskedLM,256
|
||||
DistilBertForQuestionAnswering,512
|
||||
DistillGPT2,32
|
||||
ElectraForCausalLM,64
|
||||
ElectraForQuestionAnswering,128
|
||||
GPT2ForSequenceClassification,8
|
||||
GPTJForCausalLM,1
|
||||
GPTJForQuestionAnswering,1
|
||||
@ -24,22 +15,15 @@ GPTNeoForCausalLM,32
|
||||
GPTNeoForSequenceClassification,32
|
||||
GoogleFnet,32
|
||||
LayoutLMForMaskedLM,32
|
||||
LayoutLMForSequenceClassification,32
|
||||
M2M100ForConditionalGeneration,64
|
||||
MBartForCausalLM,8
|
||||
MBartForConditionalGeneration,4
|
||||
MT5ForConditionalGeneration,32
|
||||
MegatronBertForCausalLM,16
|
||||
MegatronBertForQuestionAnswering,16
|
||||
MobileBertForMaskedLM,256
|
||||
MobileBertForQuestionAnswering,256
|
||||
OPTForCausalLM,4
|
||||
PLBartForCausalLM,16
|
||||
PLBartForConditionalGeneration,8
|
||||
PegasusForCausalLM,128
|
||||
PegasusForConditionalGeneration,64
|
||||
RobertaForCausalLM,32
|
||||
RobertaForQuestionAnswering,32
|
||||
T5ForConditionalGeneration,8
|
||||
T5Small,8
|
||||
TrOCRForCausalLM,64
|
||||
|
@ -1,41 +1,25 @@
|
||||
AlbertForMaskedLM,4
|
||||
AlbertForQuestionAnswering,4
|
||||
AllenaiLongformerBase,4
|
||||
BartForCausalLM,4
|
||||
BartForConditionalGeneration,2
|
||||
BertForMaskedLM,16
|
||||
BertForQuestionAnswering,16
|
||||
BigBird,32
|
||||
BlenderbotForCausalLM,32
|
||||
BlenderbotSmallForCausalLM,64
|
||||
BlenderbotSmallForConditionalGeneration,64
|
||||
CamemBert,16
|
||||
DebertaV2ForMaskedLM,16
|
||||
DebertaV2ForQuestionAnswering,2
|
||||
DistilBertForMaskedLM,128
|
||||
DistilBertForQuestionAnswering,256
|
||||
DistillGPT2,16
|
||||
ElectraForCausalLM,8
|
||||
ElectraForQuestionAnswering,8
|
||||
GoogleFnet,16
|
||||
GPT2ForSequenceClassification,4
|
||||
LayoutLMForMaskedLM,16
|
||||
LayoutLMForSequenceClassification,16
|
||||
M2M100ForConditionalGeneration,16
|
||||
MBartForCausalLM,4
|
||||
MBartForConditionalGeneration,2
|
||||
MegatronBertForCausalLM,4
|
||||
MegatronBertForQuestionAnswering,8
|
||||
MobileBertForMaskedLM,64
|
||||
MobileBertForQuestionAnswering,64
|
||||
MT5ForConditionalGeneration,16
|
||||
OPTForCausalLM,2
|
||||
PegasusForCausalLM,32
|
||||
PegasusForConditionalGeneration,32
|
||||
PLBartForCausalLM,8
|
||||
PLBartForConditionalGeneration,4
|
||||
RobertaForCausalLM,16
|
||||
RobertaForQuestionAnswering,16
|
||||
T5ForConditionalGeneration,4
|
||||
T5Small,1
|
||||
TrOCRForCausalLM,32
|
||||
|
@ -1038,7 +1038,8 @@ def define_buck_targets(
|
||||
name = "generated-version-header",
|
||||
header_namespace = "torch",
|
||||
exported_headers = {
|
||||
"version.h": ":generate-version-header[version.h]",
|
||||
"headeronly/version.h": ":generate-version-header[version.h]",
|
||||
"version.h": "torch/csrc/api/include/torch/version.h"
|
||||
},
|
||||
labels = labels,
|
||||
)
|
||||
@ -1047,19 +1048,27 @@ def define_buck_targets(
|
||||
fb_native.genrule(
|
||||
name = "generate-version-header",
|
||||
srcs = [
|
||||
"torch/csrc/api/include/torch/version.h.in",
|
||||
"torch/headeronly/version.h.in",
|
||||
"version.txt",
|
||||
],
|
||||
cmd = "$(exe {}tools:gen-version-header) ".format(ROOT_PATH) + " ".join([
|
||||
cmd = "mkdir -p $OUT/torch/headeronly && $(exe {}tools:gen-version-header) ".format(ROOT_PATH) + " ".join([
|
||||
"--template-path",
|
||||
"torch/csrc/api/include/torch/version.h.in",
|
||||
"torch/headeronly/version.h.in",
|
||||
"--version-path",
|
||||
"version.txt",
|
||||
"--output-path",
|
||||
"$OUT/version.h",
|
||||
"$OUT/torch/headeronly/version.h",
|
||||
]),
|
||||
cmd_exe = "md $OUT\\torch\\headeronly 2>nul & $(exe {}tools:gen-version-header) ".format(ROOT_PATH) + " ".join([
|
||||
"--template-path",
|
||||
"torch/headeronly/version.h.in",
|
||||
"--version-path",
|
||||
"version.txt",
|
||||
"--output-path",
|
||||
"$OUT\\torch\\headeronly\\version.h",
|
||||
]),
|
||||
outs = {
|
||||
"version.h": ["version.h"],
|
||||
"version.h": ["torch/headeronly/version.h"],
|
||||
},
|
||||
default_outs = ["."],
|
||||
)
|
||||
|
12
build.bzl
12
build.bzl
@ -142,18 +142,6 @@ def define_targets(rules):
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
rules.genrule(
|
||||
name = "version_h",
|
||||
srcs = [
|
||||
":torch/csrc/api/include/torch/version.h.in",
|
||||
":version.txt",
|
||||
],
|
||||
outs = ["torch/csrc/api/include/torch/version.h"],
|
||||
cmd = "$(execpath //tools/setup_helpers:gen_version_header) " +
|
||||
"--template-path $(location :torch/csrc/api/include/torch/version.h.in) " +
|
||||
"--version-path $(location :version.txt) --output-path $@ ",
|
||||
tools = ["//tools/setup_helpers:gen_version_header"],
|
||||
)
|
||||
|
||||
#
|
||||
# ATen generated code
|
||||
|
@ -127,7 +127,7 @@ struct Event final {
|
||||
}
|
||||
|
||||
void synchronize() const {
|
||||
return impl_.synchronize();
|
||||
impl_.synchronize();
|
||||
}
|
||||
|
||||
private:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user