mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 13:44:15 +08:00
Compare commits
83 Commits
viable/str
...
cpp-docs-d
Author | SHA1 | Date | |
---|---|---|---|
bad5269194 | |||
7bc13c802b | |||
d872529792 | |||
0d39ecb2ce | |||
90c0825e2d | |||
fd4bde430a | |||
b5e93ffdcf | |||
f8d0d65ddc | |||
f46ddb1e65 | |||
20082d7136 | |||
7158aa22e8 | |||
2035f6b2e6 | |||
2b58adc3bd | |||
322091d8d8 | |||
2bb4e6876c | |||
56ef7743fc | |||
64108bdbed | |||
c855f8632e | |||
12d2ef557f | |||
65aa62d50d | |||
6a09f9306c | |||
19bf67be32 | |||
1927783aa3 | |||
184817c7a8 | |||
da903b6a8b | |||
f76fdcaaf8 | |||
608792153f | |||
086dec3235 | |||
ad7b2bebc6 | |||
d444384003 | |||
3040a5d294 | |||
97463d4cf3 | |||
c813617c53 | |||
e659661ffa | |||
41808b2ba9 | |||
c0510dc447 | |||
9ec10dc26a | |||
43fc859625 | |||
f713abab16 | |||
bd3b98a8a5 | |||
e98c4e835b | |||
7b15534434 | |||
c32118dc3e | |||
e3ae80fc03 | |||
483f4e0db9 | |||
d1a62c8036 | |||
6861a27062 | |||
955f21dc2c | |||
9f5e1beaf3 | |||
2e027e8742 | |||
1e42fde45e | |||
f505caa71b | |||
65f10becdf | |||
df640df68a | |||
4c3c0ef2f1 | |||
bc33b10202 | |||
2855a045b3 | |||
9ecd092bd9 | |||
078d475d3b | |||
f37a6523ef | |||
b13cd141b3 | |||
5e47b4dd60 | |||
ee5389d520 | |||
ab01a0d7d3 | |||
801e282f39 | |||
87c9fbda22 | |||
3cc8af2d67 | |||
1fb072ac2a | |||
cac5e13e13 | |||
68350660ee | |||
ef7e2ca77e | |||
cdaaf3e4a3 | |||
0ea59c3c55 | |||
8f705d019a | |||
4bcc05777e | |||
2a6cdba6e5 | |||
53f6cc7529 | |||
ac901bf79a | |||
c965d6dbb2 | |||
ac08556f67 | |||
5fe7f29b9e | |||
ded099ecbf | |||
63fcc3e6c4 |
@ -344,7 +344,7 @@ docker build \
|
||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||
--build-arg "KATEX=${KATEX:-}" \
|
||||
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942;gfx1100}" \
|
||||
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
|
||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
|
@ -46,9 +46,9 @@ case ${DOCKER_TAG_PREFIX} in
|
||||
BASE_TARGET=rocm
|
||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
# add gfx950 conditionally starting in ROCm 7.0
|
||||
# add gfx950, gfx115x conditionally starting in ROCm 7.0
|
||||
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
|
||||
fi
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
||||
;;
|
||||
|
@ -115,6 +115,9 @@ RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
|
||||
# cmake-3.28.0 from pip for onnxruntime
|
||||
RUN python3 -mpip install cmake==3.28.0
|
||||
|
||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
||||
|
||||
# build onnxruntime 1.21.0 from sources.
|
||||
# it is not possible to build it from sources using pip,
|
||||
# so just build it from upstream repository.
|
||||
|
@ -84,9 +84,9 @@ case ${image} in
|
||||
DEVTOOLSET_VERSION="11"
|
||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
# add gfx950 conditionally starting in ROCm 7.0
|
||||
# add gfx950, gfx115x conditionally starting in ROCm 7.0
|
||||
if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950"
|
||||
PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
|
||||
fi
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||
;;
|
||||
|
@ -1,15 +1,11 @@
|
||||
sphinx==5.3.0
|
||||
sphinx==7.2.6
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 5.3.0
|
||||
#Pinned versions: 7.2.6
|
||||
|
||||
standard-imghdr==3.13.0; python_version >= "3.13"
|
||||
#Description: This is needed by Sphinx, so it needs to be added here.
|
||||
# The reasons are as follows:
|
||||
# 1) This module has been removed from the Python standard library since Python 3.13(https://peps.python.org/pep-0594/#imghdr);
|
||||
# 2) The current version of Sphinx (5.3.0) is not compatible with Python 3.13.
|
||||
# Once Sphinx is upgraded to a version compatible with Python 3.13 or later, we can remove this dependency.
|
||||
pytorch_sphinx_theme2==0.1.0
|
||||
#Description: This is needed to generate PyTorch docs
|
||||
#Pinned versions: 0.1.0
|
||||
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
|
||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
||||
# something related to Docker setup. We can investigate this later.
|
||||
@ -36,17 +32,17 @@ tensorboard==2.18.0 ; python_version >= "3.13"
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 2.13.0
|
||||
|
||||
breathe==4.34.0
|
||||
breathe==4.36.0
|
||||
#Description: This is used to generate PyTorch C++ docs
|
||||
#Pinned versions: 4.34.0
|
||||
#Pinned versions: 4.36.0
|
||||
|
||||
exhale==0.2.3
|
||||
exhale==0.3.7
|
||||
#Description: This is used to generate PyTorch C++ docs
|
||||
#Pinned versions: 0.2.3
|
||||
#Pinned versions: 0.3.7
|
||||
|
||||
docutils==0.16
|
||||
docutils==0.20
|
||||
#Description: This is used to generate PyTorch C++ docs
|
||||
#Pinned versions: 0.16
|
||||
#Pinned versions: 0.20
|
||||
|
||||
bs4==0.0.1
|
||||
#Description: This is used to generate PyTorch C++ docs
|
||||
@ -56,13 +52,13 @@ IPython==8.12.0
|
||||
#Description: This is used to generate PyTorch functorch docs
|
||||
#Pinned versions: 8.12.0
|
||||
|
||||
myst-nb==0.17.2
|
||||
myst-nb==1.3.0
|
||||
#Description: This is used to generate PyTorch functorch and torch.compile docs.
|
||||
#Pinned versions: 0.17.2
|
||||
#Pinned versions: 1.3.0
|
||||
|
||||
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
||||
python-etcd==0.4.5
|
||||
sphinx-copybutton==0.5.0
|
||||
sphinx-design==0.4.0
|
||||
sphinx-design==0.6.1
|
||||
sphinxcontrib-mermaid==1.0.0
|
||||
myst-parser==0.18.1
|
||||
myst-parser==4.0.1
|
||||
|
@ -67,7 +67,7 @@ fi
|
||||
# wheels with cxx11-abi
|
||||
|
||||
echo "Checking that the gcc ABI is what we expect"
|
||||
if [[ "$(uname)" != 'Darwin' && "$(uname -m)" != "s390x" ]]; then
|
||||
if [[ "$(uname)" != 'Darwin' ]]; then
|
||||
# We also check that there are cxx11 symbols in libtorch
|
||||
#
|
||||
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
||||
|
@ -102,8 +102,18 @@ if [ "$is_main_doc" = true ]; then
|
||||
echo coverage output not found
|
||||
exit 1
|
||||
elif [ $undocumented -gt 0 ]; then
|
||||
echo undocumented objects found:
|
||||
echo "======================================"
|
||||
echo "ERROR: $undocumented undocumented objects found!"
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "Full coverage report:"
|
||||
cat build/coverage/python.txt
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "Undocumented modules/objects (lines after TOTAL):"
|
||||
tail -n +$((lines - undocumented + 1)) build/coverage/python.txt
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "Make sure you've updated relevant .rsts in docs/source!"
|
||||
echo "You can reproduce locally by running 'cd docs && make coverage && cat build/coverage/python.txt'"
|
||||
exit 1
|
||||
|
@ -886,7 +886,7 @@ test_inductor_torchbench_smoketest_perf() {
|
||||
done
|
||||
|
||||
# Perform some "warm-start" runs for a few huggingface models.
|
||||
for test in AlbertForQuestionAnswering AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
|
||||
for test in AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
|
||||
python benchmarks/dynamo/huggingface.py --accuracy --training --amp --inductor --device cuda --warm-start-latency \
|
||||
--only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
|
BIN
.github/scripts/drci_mocks.json.gz
vendored
BIN
.github/scripts/drci_mocks.json.gz
vendored
Binary file not shown.
1
.github/scripts/github_utils.py
vendored
1
.github/scripts/github_utils.py
vendored
@ -18,6 +18,7 @@ class GitHubComment:
|
||||
body_text: str
|
||||
created_at: str
|
||||
author_login: str
|
||||
author_url: Optional[str]
|
||||
author_association: str
|
||||
editor_login: Optional[str]
|
||||
database_id: int
|
||||
|
BIN
.github/scripts/gql_mocks.json.gz
vendored
BIN
.github/scripts/gql_mocks.json.gz
vendored
Binary file not shown.
2
.github/scripts/test_check_labels.py
vendored
2
.github/scripts/test_check_labels.py
vendored
@ -38,6 +38,7 @@ def mock_get_comments() -> list[GitHubComment]:
|
||||
body_text="mock_body_text",
|
||||
created_at="",
|
||||
author_login="",
|
||||
author_url=None,
|
||||
author_association="",
|
||||
editor_login=None,
|
||||
database_id=1,
|
||||
@ -48,6 +49,7 @@ def mock_get_comments() -> list[GitHubComment]:
|
||||
body_text=" #" + LABEL_ERR_MSG_TITLE.replace("`", ""),
|
||||
created_at="",
|
||||
author_login=BOT_AUTHORS[1],
|
||||
author_url=None,
|
||||
author_association="",
|
||||
editor_login=None,
|
||||
database_id=2,
|
||||
|
18
.github/scripts/test_trymerge.py
vendored
18
.github/scripts/test_trymerge.py
vendored
@ -32,6 +32,7 @@ from trymerge import (
|
||||
main as trymerge_main,
|
||||
MandatoryChecksMissingError,
|
||||
MergeRule,
|
||||
PostCommentError,
|
||||
RE_GHSTACK_DESC,
|
||||
read_merge_rules,
|
||||
remove_job_name_suffix,
|
||||
@ -588,6 +589,23 @@ class TestTryMerge(TestCase):
|
||||
self.assertEqual(mock_merge_base, pr.get_merge_base())
|
||||
mocked_gh_fetch_merge_base.assert_called_once()
|
||||
|
||||
def test_app_can_revert(self, *args: Any) -> None:
|
||||
pr = GitHubPR("pytorch", "pytorch", 164660)
|
||||
repo = DummyGitRepo()
|
||||
app_comment_id, impostor_comment_id = 3375785595, 3377647892
|
||||
# Check that app can revert
|
||||
self.assertIsNotNone(validate_revert(repo, pr, comment_id=app_comment_id))
|
||||
# But impostor can not
|
||||
self.assertRaises(
|
||||
PostCommentError,
|
||||
lambda: validate_revert(repo, pr, comment_id=impostor_comment_id),
|
||||
)
|
||||
# Despite it's name being the name of the bot
|
||||
self.assertEqual(
|
||||
pr.get_comment_by_id(impostor_comment_id).author_login,
|
||||
"pytorch-auto-revert",
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
|
||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
|
||||
|
7
.github/scripts/trymerge.py
vendored
7
.github/scripts/trymerge.py
vendored
@ -234,6 +234,7 @@ query ($owner: String!, $name: String!, $number: Int!) {
|
||||
createdAt
|
||||
author {
|
||||
login
|
||||
url
|
||||
}
|
||||
authorAssociation
|
||||
editor {
|
||||
@ -1093,6 +1094,7 @@ class GitHubPR:
|
||||
body_text=node["bodyText"],
|
||||
created_at=node["createdAt"] if "createdAt" in node else "",
|
||||
author_login=node["author"]["login"],
|
||||
author_url=node["author"].get("url", None),
|
||||
author_association=node["authorAssociation"],
|
||||
editor_login=editor["login"] if editor else None,
|
||||
database_id=node["databaseId"],
|
||||
@ -2029,6 +2031,11 @@ def validate_revert(
|
||||
# For some reason, one can not be a member of private repo, only CONTRIBUTOR
|
||||
if pr.is_base_repo_private():
|
||||
allowed_reverters.append("CONTRIBUTOR")
|
||||
# Special case the pytorch-auto-revert app, whose does not have association
|
||||
# But should be able to issue revert command
|
||||
if comment.author_url == "https://github.com/apps/pytorch-auto-revert":
|
||||
allowed_reverters.append("NONE")
|
||||
|
||||
if author_association not in allowed_reverters:
|
||||
raise PostCommentError(
|
||||
f"Will not revert as @{author_login} is not one of "
|
||||
|
@ -63,6 +63,7 @@ jobs:
|
||||
# Same as the build job
|
||||
python-version: 3.12.7
|
||||
test-matrix: ${{ needs.macos-perf-py3-arm64-build.outputs.test-matrix }}
|
||||
timeout-minutes: 300
|
||||
disable-monitor: false
|
||||
monitor-log-interval: 15
|
||||
monitor-data-collect-interval: 4
|
||||
|
26
.github/workflows/rocm.yml
vendored
26
.github/workflows/rocm.yml
vendored
@ -59,3 +59,29 @@ jobs:
|
||||
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-rocm-py3_10-gfx1100-test:
|
||||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
name: linux-jammy-rocm-py3_10-gfx1100
|
||||
uses: ./.github/workflows/_rocm-test.yml
|
||||
needs:
|
||||
- linux-jammy-rocm-py3_10-build
|
||||
- target-determination
|
||||
with:
|
||||
build-environment: linux-jammy-rocm-py3.10
|
||||
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "default", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx1100" },
|
||||
{ config: "default", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx1100" },
|
||||
]}
|
||||
tests-to-include: >
|
||||
test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs
|
||||
test_autograd inductor/test_torchinductor inductor/test_kernel_benchmark
|
||||
inductor/test_pad_mm inductor/test_benchmark_fusion inductor/test_aot_inductor
|
||||
inductor/test_torchinductor inductor/test_decompose_mem_bound_mm
|
||||
inductor/test_flex_attention inductor/test_max_autotune
|
||||
secrets: inherit
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -88,7 +88,7 @@ torch_compile_debug/
|
||||
# Listed manually because some files in this directory are not generated
|
||||
torch/testing/_internal/generated/annotated_fn_args.py
|
||||
torch/testing/_internal/data/*.pt
|
||||
torch/csrc/api/include/torch/version.h
|
||||
torch/headeronly/version.h
|
||||
torch/csrc/cudnn/cuDNN.cpp
|
||||
torch/csrc/generated
|
||||
torch/csrc/generic/TensorMethods.cpp
|
||||
|
@ -28,7 +28,7 @@ exclude_patterns = [
|
||||
'torch/lib/**',
|
||||
'venv/**',
|
||||
'**/*.pyi',
|
||||
"tools/experimental/dynamic_shapes/torchfuzz/**",
|
||||
"tools/experimental/torchfuzz/**",
|
||||
'tools/test/test_selective_build.py',
|
||||
]
|
||||
command = [
|
||||
@ -198,7 +198,7 @@ exclude_patterns = [
|
||||
'tools/test/gen_operators_yaml_test.py',
|
||||
'tools/test/gen_oplist_test.py',
|
||||
'tools/test/test_selective_build.py',
|
||||
'tools/experimental/dynamic_shapes/torchfuzz/**',
|
||||
'tools/experimental/torchfuzz/**',
|
||||
]
|
||||
command = [
|
||||
'python3',
|
||||
|
@ -13,6 +13,9 @@ load(":build_variables.bzl", "jit_core_sources", "lazy_tensor_ts_sources", "libt
|
||||
load(":ufunc_defs.bzl", "aten_ufunc_generated_cpu_kernel_sources", "aten_ufunc_generated_cpu_sources", "aten_ufunc_generated_cuda_sources")
|
||||
load("//:tools/bazel.bzl", "rules")
|
||||
|
||||
# Export files for use by torch/headeronly (where version.h generation now lives)
|
||||
exports_files(["version.txt"])
|
||||
|
||||
define_targets(rules = rules)
|
||||
|
||||
COMMON_COPTS = [
|
||||
@ -690,7 +693,9 @@ cc_library(
|
||||
"torch/csrc/*/generated/*.h",
|
||||
"torch/csrc/jit/serialization/mobile_bytecode_generated.h",
|
||||
] + torch_cuda_headers,
|
||||
) + GENERATED_AUTOGRAD_CPP + [":version_h"],
|
||||
) + GENERATED_AUTOGRAD_CPP + [
|
||||
"//torch/headeronly:version_h",
|
||||
],
|
||||
includes = [
|
||||
"third_party/kineto/libkineto/include",
|
||||
"torch/csrc",
|
||||
|
@ -229,14 +229,14 @@ struct TORCH_API SparseTensorImpl : public TensorImpl {
|
||||
}
|
||||
|
||||
void resize_(int64_t sparse_dim, int64_t dense_dim, ArrayRef<int64_t> size) {
|
||||
return _resize_(sparse_dim, dense_dim, size);
|
||||
_resize_(sparse_dim, dense_dim, size);
|
||||
}
|
||||
|
||||
void resize_(
|
||||
int64_t sparse_dim,
|
||||
int64_t dense_dim,
|
||||
ArrayRef<c10::SymInt> size) {
|
||||
return _resize_(sparse_dim, dense_dim, size);
|
||||
_resize_(sparse_dim, dense_dim, size);
|
||||
}
|
||||
|
||||
// NOTE: this function will resize the sparse tensor and also set `indices`
|
||||
|
@ -59,7 +59,7 @@ static inline void set_item(const Tensor& self, ArrayRef<TensorIndex> indices, c
|
||||
}
|
||||
}
|
||||
|
||||
return set_item(self, indices, value);
|
||||
set_item(self, indices, value);
|
||||
}
|
||||
|
||||
} // namespace indexing
|
||||
|
@ -765,7 +765,8 @@ void TensorIteratorBase::for_each(loop2d_t loop, int64_t grain_size) {
|
||||
if (numel == 0) {
|
||||
return;
|
||||
} else if (numel < grain_size || at::get_num_threads() == 1) {
|
||||
return serial_for_each(loop, {0, numel});
|
||||
serial_for_each(loop, {0, numel});
|
||||
return;
|
||||
} else {
|
||||
at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) {
|
||||
serial_for_each(loop, {begin, end});
|
||||
|
@ -49,7 +49,7 @@ static void check_unique_names(DimnameList names) {
|
||||
}
|
||||
|
||||
void check_names_valid_for(const TensorBase& tensor, DimnameList names) {
|
||||
return impl::check_names_valid_for(tensor.unsafeGetTensorImpl(), names);
|
||||
impl::check_names_valid_for(tensor.unsafeGetTensorImpl(), names);
|
||||
}
|
||||
|
||||
void check_names_valid_for(size_t tensor_dim, DimnameList names) {
|
||||
|
@ -138,7 +138,7 @@ void Tensor::_backward(TensorList inputs,
|
||||
const std::optional<Tensor>& gradient,
|
||||
std::optional<bool> keep_graph,
|
||||
bool create_graph) const {
|
||||
return impl::GetVariableHooks()->_backward(*this, inputs, gradient, keep_graph, create_graph);
|
||||
impl::GetVariableHooks()->_backward(*this, inputs, gradient, keep_graph, create_graph);
|
||||
}
|
||||
|
||||
const TensorBase& TensorBase::requires_grad_(bool _requires_grad) const {
|
||||
|
@ -496,7 +496,7 @@ class TORCH_API OperatorHandle {
|
||||
}
|
||||
|
||||
void checkInvariants() const {
|
||||
return operatorDef_->op.checkInvariants();
|
||||
operatorDef_->op.checkInvariants();
|
||||
}
|
||||
|
||||
c10::ArrayRef<at::Tag> getTags() const {
|
||||
@ -932,7 +932,7 @@ inline void Dispatcher::redispatchBoxed(
|
||||
}
|
||||
#endif
|
||||
const auto& kernel = entry.lookup(dispatchKeySet);
|
||||
return kernel.callBoxed(op, dispatchKeySet, stack);
|
||||
kernel.callBoxed(op, dispatchKeySet, stack);
|
||||
}
|
||||
|
||||
} // namespace c10
|
||||
|
@ -326,6 +326,23 @@ bool CUDAHooks::supportsBFloat16ConvolutionWithCuDNNv8() const {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool CUDAHooks::supportsBFloat16RNNWithCuDNN() const {
|
||||
#if AT_CUDNN_ENABLED() && (CUDNN_VERSION >= 91300)
|
||||
if (!hasCUDA()) {
|
||||
return false;
|
||||
}
|
||||
cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
|
||||
// Check for Volta cores
|
||||
if (prop->major >= 8) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
long CUDAHooks::versionCuDNN() const {
|
||||
#if AT_CUDNN_ENABLED()
|
||||
return CUDNN_VERSION;
|
||||
|
@ -45,6 +45,7 @@ struct CUDAHooks : public at::CUDAHooksInterface {
|
||||
bool supportsDilatedConvolutionWithCuDNN() const override;
|
||||
bool supportsDepthwiseConvolutionWithCuDNN() const override;
|
||||
bool supportsBFloat16ConvolutionWithCuDNNv8() const override;
|
||||
bool supportsBFloat16RNNWithCuDNN() const override;
|
||||
bool hasCUDART() const override;
|
||||
long versionCUDART() const override;
|
||||
long versionCuDNN() const override;
|
||||
|
@ -166,6 +166,10 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool supportsBFloat16RNNWithCuDNN() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual long versionCuDNN() const {
|
||||
TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);
|
||||
}
|
||||
|
@ -465,11 +465,11 @@ static void dynamicLayerBack(const c10::OperatorHandle& op, torch::jit::Stack* s
|
||||
|
||||
// used for functions that have aliasing operations but should be treated like they're out of place (i.e. lift_fresh)
|
||||
static void dynamicLayerBackGradSpecialCase(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
return dynamicLayerBack(op, stack, true);
|
||||
dynamicLayerBack(op, stack, true);
|
||||
}
|
||||
|
||||
static void dynamicLayerBackFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
return dynamicLayerBack(op, stack, false);
|
||||
dynamicLayerBack(op, stack, false);
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, FuncTorchDynamicLayerFrontMode, m) {
|
||||
|
@ -375,7 +375,7 @@ static void bf16_gemv_trans(
|
||||
const at::BFloat16 beta,
|
||||
at::BFloat16* y,
|
||||
const int incy) {
|
||||
return bf16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||
bf16_gemv_trans_stub(kCPU, m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -70,7 +70,7 @@ inline void searchsorted_maybe_trim_input_tensors(
|
||||
const Tensor& raw_boundaries) {
|
||||
Tensor trimmed_sorter;
|
||||
Tensor raw_sorter;
|
||||
return searchsorted_maybe_trim_input_tensors(
|
||||
searchsorted_maybe_trim_input_tensors(
|
||||
trimmed_input,
|
||||
trimmed_boundaries,
|
||||
trimmed_sorter,
|
||||
|
@ -93,6 +93,12 @@ inline bool cond_cudnn_grid_sampler(
|
||||
const TensorBase& input,
|
||||
const TensorBase& grid
|
||||
) {
|
||||
auto st = input.scalar_type();
|
||||
if (!(st == kDouble || st == kFloat || st == kHalf))
|
||||
return false;
|
||||
st = grid.scalar_type();
|
||||
if (!(st == kDouble || st == kFloat || st == kHalf))
|
||||
return false;
|
||||
return (
|
||||
at::native::cudnn_is_acceptable(input) &&
|
||||
at::native::cudnn_is_acceptable(grid) &&
|
||||
|
@ -108,6 +108,13 @@ bool use_mkldnn(const Tensor& input, TensorList params, TensorList hx) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool use_cudnn(const Tensor& t) {
|
||||
bool acceptable = at::cudnn_is_acceptable(t);
|
||||
auto st = t.scalar_type();
|
||||
bool bfloat16_cond = st == kBFloat16 && at::detail::getCUDAHooks().supportsBFloat16RNNWithCuDNN();
|
||||
return acceptable && (bfloat16_cond || st == kDouble || st == kFloat || st == kHalf);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
using pair_of = std::pair<T, T>;
|
||||
|
||||
@ -1200,7 +1207,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor> _thnn_fused_lstm_cell_backwar
|
||||
bool train, \
|
||||
bool bidirectional, \
|
||||
bool batch_first) { \
|
||||
if (at::cudnn_is_acceptable(_input)) { \
|
||||
if (use_cudnn(_input)) { \
|
||||
Tensor output, hy; \
|
||||
NAME##_cudnn_stub( \
|
||||
_input.device().type(), \
|
||||
@ -1262,7 +1269,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor> _thnn_fused_lstm_cell_backwar
|
||||
double dropout_p, \
|
||||
bool train, \
|
||||
bool bidirectional) { \
|
||||
if (at::cudnn_is_acceptable(data)) { \
|
||||
if (use_cudnn(data)) { \
|
||||
Tensor output, hy; \
|
||||
NAME##_packed_cudnn_stub( \
|
||||
data.device().type(), \
|
||||
@ -1430,7 +1437,7 @@ std::tuple<Tensor, Tensor, Tensor> lstm(
|
||||
TensorList _params, bool has_biases,
|
||||
int64_t num_layers, double dropout_p, bool train, bool bidirectional, bool batch_first) {
|
||||
TORCH_CHECK(hx.size() == 2, "lstm expects two hidden states");
|
||||
if (at::cudnn_is_acceptable(_input)) {
|
||||
if (use_cudnn(_input)) {
|
||||
Tensor output, hy, cy;
|
||||
lstm_cudnn_stub(_input.device().type(), output, hy, cy, _input, hx, _params, has_biases,
|
||||
num_layers, dropout_p, train, bidirectional, batch_first);
|
||||
@ -1491,7 +1498,7 @@ std::tuple<Tensor, Tensor, Tensor> lstm(
|
||||
TensorList _params, bool has_biases,
|
||||
int64_t num_layers, double dropout_p, bool train, bool bidirectional) {
|
||||
TORCH_CHECK(hx.size() == 2, "lstm expects two hidden states");
|
||||
if (at::cudnn_is_acceptable(data)) {
|
||||
if (use_cudnn(data)) {
|
||||
Tensor output, hy, cy;
|
||||
lstm_packed_cudnn_stub(data.device().type(), output, hy, cy, data, batch_sizes, hx,
|
||||
_params, has_biases, num_layers, dropout_p, train, bidirectional);
|
||||
|
@ -23,14 +23,6 @@
|
||||
#include <ATen/Functions.h>
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#else
|
||||
#include <ATen/ops/_cast_Byte_native.h>
|
||||
#include <ATen/ops/_cast_Char_native.h>
|
||||
#include <ATen/ops/_cast_Double_native.h>
|
||||
#include <ATen/ops/_cast_Float_native.h>
|
||||
#include <ATen/ops/_cast_Half_native.h>
|
||||
#include <ATen/ops/_cast_Int_native.h>
|
||||
#include <ATen/ops/_cast_Long_native.h>
|
||||
#include <ATen/ops/_cast_Short_native.h>
|
||||
#include <ATen/ops/_dim_arange_native.h>
|
||||
#include <ATen/ops/_efficientzerotensor_native.h>
|
||||
#include <ATen/ops/_empty_affine_quantized.h>
|
||||
|
@ -91,9 +91,6 @@ bool cudnn_is_acceptable(const TensorBase& self) {
|
||||
return false;
|
||||
if (!self.is_cuda())
|
||||
return false;
|
||||
auto st = self.scalar_type();
|
||||
if (!(st == kDouble || st == kFloat || st == kHalf))
|
||||
return false;
|
||||
if (!detail::getCUDAHooks().compiledWithCuDNN())
|
||||
return false;
|
||||
// cuDNN functions like grid_sampler returns CUDNN_STATUS_BAD_PARAM on empty
|
||||
|
@ -25,11 +25,11 @@
|
||||
namespace at::native {
|
||||
|
||||
void _backward(const Tensor& self, TensorList inputs, const std::optional<Tensor>& gradient_opt, std::optional<bool> keep_graph, bool create_graph) {
|
||||
return self._backward(inputs, gradient_opt, keep_graph, create_graph);
|
||||
self._backward(inputs, gradient_opt, keep_graph, create_graph);
|
||||
}
|
||||
|
||||
void set_data(Tensor& self, const Tensor& new_data) {
|
||||
return self.set_data(new_data);
|
||||
self.set_data(new_data);
|
||||
}
|
||||
|
||||
Tensor data(const Tensor& self) {
|
||||
@ -54,7 +54,7 @@ Tensor& requires_grad_(Tensor& self, bool _requires_grad) {
|
||||
}
|
||||
|
||||
void retain_grad(Tensor& self) {
|
||||
return self.retain_grad();
|
||||
self.retain_grad();
|
||||
}
|
||||
|
||||
bool retains_grad(const Tensor& self) {
|
||||
|
@ -300,7 +300,8 @@ void div_floor_kernel(TensorIteratorBase& iter) {
|
||||
// In the special case of unsigned integer division, floor division is
|
||||
// equivalent to truncation division (since the signs of the divisor and
|
||||
// dividend are always the same)
|
||||
return div_trunc_kernel(iter);
|
||||
div_trunc_kernel(iter);
|
||||
return;
|
||||
} else if (isIntegralType(dtype, /*includeBool*/ false)) {
|
||||
// There's no SIMD integer division, so don't try to vectorize it.
|
||||
AT_DISPATCH_INTEGRAL_TYPES(dtype, "div_floor_cpu", [&]() {
|
||||
|
@ -749,21 +749,29 @@ void flip_kernel(TensorIterator& iter, const bool quantized) {
|
||||
// });
|
||||
|
||||
if (iter_dtype == kByte) {
|
||||
return cpu_hflip_vec<uint8_t>(iter);
|
||||
cpu_hflip_vec<uint8_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kChar) {
|
||||
return cpu_hflip_vec<int8_t>(iter);
|
||||
cpu_hflip_vec<int8_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kInt) {
|
||||
return cpu_hflip_vec<int32_t>(iter);
|
||||
cpu_hflip_vec<int32_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kLong) {
|
||||
return cpu_hflip_vec<int64_t>(iter);
|
||||
cpu_hflip_vec<int64_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kShort) {
|
||||
return cpu_hflip_vec<int16_t>(iter);
|
||||
cpu_hflip_vec<int16_t>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kBool) {
|
||||
return cpu_hflip_vec<bool>(iter);
|
||||
cpu_hflip_vec<bool>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kFloat) {
|
||||
return cpu_hflip_vec<float>(iter);
|
||||
cpu_hflip_vec<float>(iter);
|
||||
return;
|
||||
} else if (iter_dtype == kDouble) {
|
||||
return cpu_hflip_vec<double>(iter);
|
||||
cpu_hflip_vec<double>(iter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// other dtypes (float16, bfloat16, complex) are handled by cpu_kernel_vec (see below)
|
||||
@ -778,10 +786,12 @@ void flip_kernel(TensorIterator& iter, const bool quantized) {
|
||||
c == input_strides_2[1] &&
|
||||
c == iter.element_size(0) * iter.shape()[0] // checks if dim=1 is contiguous as well
|
||||
) {
|
||||
return cpu_hflip_channels_last_vec(iter);
|
||||
cpu_hflip_channels_last_vec(iter);
|
||||
return;
|
||||
}
|
||||
// Special case: vertical flip using memcpy (faster than generic cpu_kernel_vec)
|
||||
return cpu_vflip_memcpy(iter);
|
||||
cpu_vflip_memcpy(iter);
|
||||
return;
|
||||
}
|
||||
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(kBool, kHalf, kBFloat16, iter.dtype(), "flip_cpu",
|
||||
|
@ -96,11 +96,14 @@ static void pow_tensor_scalar_kernel(
|
||||
dtype == kBFloat16 || isComplexType(dtype)) {
|
||||
// Dispatch to fast specialization for sqrt, rsqrt and reciprocal
|
||||
if (exp_scalar.equal(.5)) {
|
||||
return sqrt_kernel(iter);
|
||||
sqrt_kernel(iter);
|
||||
return;
|
||||
} else if (exp_scalar.equal(-0.5)) {
|
||||
return rsqrt_kernel(iter);
|
||||
rsqrt_kernel(iter);
|
||||
return;
|
||||
} else if (exp_scalar.equal(-1.0)) {
|
||||
return reciprocal_kernel(iter);
|
||||
reciprocal_kernel(iter);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -256,10 +256,10 @@ static void norm_kernel_tensor_iterator_impl(
|
||||
} else {
|
||||
if (iter.input_dtype() == kHalf && iter.dtype(0) == kFloat) {
|
||||
// type promotion that does cast and reduction in a single kernel
|
||||
return norm_kernel_cpu_impl<at::Half, float>(iter, val);
|
||||
norm_kernel_cpu_impl<at::Half, float>(iter, val); return;
|
||||
} else if (iter.input_dtype() == kBFloat16 && iter.dtype(0) == kFloat) {
|
||||
// type promotion that does cast and reduction in a single kernel
|
||||
return norm_kernel_cpu_impl<at::BFloat16, float>(iter, val);
|
||||
norm_kernel_cpu_impl<at::BFloat16, float>(iter, val); return;
|
||||
}
|
||||
|
||||
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND3(kHalf, kBFloat16, kComplexHalf, iter.input_dtype(), "norm_cpu", [&] {
|
||||
|
@ -428,10 +428,11 @@ void fp16_gemv_trans(
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(incx == 1 && alpha == 1.0);
|
||||
#if !defined(__aarch64__) || defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
|
||||
if (at::globalContext().allowFP16ReductionCPU()) {
|
||||
return fp16_gemv_trans_fp16_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
fp16_gemv_trans_fp16_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
return fp16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
fp16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, beta, y, incy);
|
||||
}
|
||||
|
||||
float bf16_dot_with_fp32_arith(const at::BFloat16* vec1, const at::BFloat16* vec2, int64_t len) {
|
||||
@ -465,7 +466,7 @@ void bf16_gemv_trans(
|
||||
at::BFloat16* y,
|
||||
const int incy) {
|
||||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(incx == 1 && alpha == 1.0 && beta == 0.0);
|
||||
return bf16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, y, incy);
|
||||
bf16_gemv_trans_fp32_arith_by_dot_products(m, n, a, lda, x, y, incy);
|
||||
}
|
||||
|
||||
float fp16_dot(
|
||||
|
@ -121,7 +121,7 @@ void cufft_set_plan_cache_max_size_impl(DeviceIndex device_index, int64_t max_si
|
||||
"cufft_set_plan_cache_max_size: expected 0 <= device_index < ",
|
||||
at::detail::getCUDAHooks().deviceCount(), "], but got device_index=",
|
||||
device_index);
|
||||
return cufft_get_plan_cache(device_index).resize(max_size);
|
||||
cufft_get_plan_cache(device_index).resize(max_size);
|
||||
}
|
||||
|
||||
int64_t cufft_get_plan_cache_size_impl(DeviceIndex device_index) {
|
||||
@ -137,7 +137,7 @@ void cufft_clear_plan_cache_impl(DeviceIndex device_index) {
|
||||
"cufft_clear_plan_cache: expected 0 <= device_index < ",
|
||||
at::detail::getCUDAHooks().deviceCount(), "], but got device_index=",
|
||||
device_index);
|
||||
return cufft_get_plan_cache(device_index).clear();
|
||||
cufft_get_plan_cache(device_index).clear();
|
||||
}
|
||||
|
||||
} // namespace at::native::detail
|
||||
|
@ -230,7 +230,7 @@ constexpr int BLOCK_THREADS = 256;
|
||||
constexpr int RADIX_BITS = 8;
|
||||
constexpr int RADIX_DIGITS = 1 << RADIX_BITS; // 2 ^ RADIX_BITS
|
||||
constexpr int RADIX_MASK = (RADIX_DIGITS - 1);
|
||||
static_assert(RADIX_DIGITS <= BLOCK_THREADS, "radixFindKthValues kernel requires RADIX_DIGITS <= BLOCK_THREADS");
|
||||
static_assert(RADIX_DIGITS <= BLOCK_THREADS, "RADIX_DIGITS must be <= BLOCK_THREADS");
|
||||
constexpr int MIN_ITEMS_PER_THREAD = 4;
|
||||
constexpr int MAX_ITEMS_PER_THREAD = 64;
|
||||
|
||||
@ -242,11 +242,10 @@ __global__ void fill(T* x, T value, IndexType size) {
|
||||
}
|
||||
}
|
||||
|
||||
// find the kth smallest value,
|
||||
// for largest topk, k_to_find = slice_size - k + 1
|
||||
// compute local histogram for each block
|
||||
template <typename T, typename IndexType, typename Bitwise, int Dim>
|
||||
C10_LAUNCH_BOUNDS_1(BLOCK_THREADS)
|
||||
__global__ void radixFindKthValues(
|
||||
__global__ void computeBlockDigitCounts(
|
||||
at::cuda::detail::TensorInfo<const T, IndexType> input,
|
||||
uint32_t slice_size,
|
||||
uint32_t* ks_to_find, // size: num_slices, unused arg but for mysterious reasons perf is better when it's present
|
||||
@ -321,12 +320,51 @@ __global__ void radixFindKthValues(
|
||||
}
|
||||
}
|
||||
|
||||
// compute global histogram and cumsum for each row
|
||||
__global__ void computeDigitCumSum(
|
||||
short* counts,
|
||||
uint32_t* digit_cum_sum,
|
||||
uint32_t blocks_per_slice) {
|
||||
int tidx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int digit_idx = threadIdx.x;
|
||||
uint32_t slice_idx = blockIdx.x;
|
||||
|
||||
typedef cub::BlockScan<uint32_t, RADIX_DIGITS> BlockScan;
|
||||
__shared__ typename BlockScan::TempStorage scan_storage;
|
||||
// accumulates counters from multiple blocks
|
||||
uint32_t digit_count = 0;
|
||||
if (threadIdx.x < RADIX_DIGITS) {
|
||||
constexpr int HISTO_ACCUM_TILE = 4;
|
||||
uint32_t rounds = blocks_per_slice / HISTO_ACCUM_TILE;
|
||||
for (int iter = 0; iter < rounds; iter++) {
|
||||
int base = HISTO_ACCUM_TILE * iter;
|
||||
#pragma unroll
|
||||
for (int j = 0; j < HISTO_ACCUM_TILE; j++) {
|
||||
int blk = base + j;
|
||||
digit_count += counts[(slice_idx * blocks_per_slice + blk) * RADIX_DIGITS + digit_idx];
|
||||
}
|
||||
}
|
||||
for (int blk = HISTO_ACCUM_TILE * rounds; blk < blocks_per_slice; blk++) {
|
||||
digit_count += counts[(slice_idx * blocks_per_slice + blk) * RADIX_DIGITS + digit_idx];
|
||||
}
|
||||
|
||||
}
|
||||
// compute the block-wide inclusive prefix sum
|
||||
uint32_t digit_count_cumsum;
|
||||
BlockScan(scan_storage).InclusiveSum(digit_count, digit_count_cumsum);
|
||||
__syncthreads();
|
||||
if (threadIdx.x < RADIX_DIGITS) {
|
||||
digit_cum_sum[tidx] = digit_count_cumsum;
|
||||
}
|
||||
}
|
||||
|
||||
// Assumption: k can not be larger than UINT32_MAX
|
||||
template <typename Bitwise, typename T>
|
||||
C10_LAUNCH_BOUNDS_1(RADIX_DIGITS) // one thread per digit
|
||||
__global__ void computeBlockwiseWithinKCounts(
|
||||
Bitwise* desires_in, // size: num_slices
|
||||
short* counts, // size: num_slices * blocks_per_slice * radix_digits
|
||||
uint32_t* digit_cum_sum,
|
||||
uint32_t* ks_to_find_in, // size: num_slices
|
||||
uint32_t blocks_per_slice,
|
||||
int current_bit,
|
||||
@ -338,7 +376,7 @@ __global__ void computeBlockwiseWithinKCounts(
|
||||
Bitwise* desires_out,
|
||||
uint32_t num_blocks
|
||||
) {
|
||||
// This kernel should be launched with the same number of blocks as the `radixFindKthValues` kernel.
|
||||
// This kernel should be launched with the same number of blocks as the `computeBlockDigitCounts` kernel.
|
||||
int tidx = threadIdx.x;
|
||||
uint32_t block_idx = getLinearBlockId<uint32_t>();
|
||||
uint32_t slice_idx = block_idx / blocks_per_slice;
|
||||
@ -351,36 +389,15 @@ __global__ void computeBlockwiseWithinKCounts(
|
||||
if (block_idx >= num_blocks) {
|
||||
return;
|
||||
}
|
||||
typedef cub::BlockScan<uint32_t, BLOCK_THREADS> BlockScan;
|
||||
union __align__(16) TempStorage {
|
||||
uint32_t digit_count_cumsum[RADIX_DIGITS]; // only used if this it the last block for this slice
|
||||
typename BlockScan::TempStorage scan_storage;
|
||||
};
|
||||
__shared__ TempStorage temp_storage;
|
||||
|
||||
// accumulates counters from multiple blocks
|
||||
uint32_t digit_count = 0;
|
||||
if (tidx < RADIX_DIGITS) {
|
||||
for (int blk = 0; blk < blocks_per_slice; ++blk) {
|
||||
digit_count += counts[(slice_idx * blocks_per_slice + blk) * RADIX_DIGITS + tidx];
|
||||
}
|
||||
}
|
||||
|
||||
// compute the block-wide inclusive prefix sum
|
||||
uint32_t digit_count_cumsum;
|
||||
BlockScan(temp_storage.scan_storage).InclusiveSum(digit_count, digit_count_cumsum);
|
||||
__syncthreads();
|
||||
// every thread also need the perfix_sum of it's left value for comparison, so save a copy in shared mem
|
||||
if (tidx < RADIX_DIGITS) {
|
||||
temp_storage.digit_count_cumsum[tidx] = digit_count_cumsum;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
__shared__ Bitwise desired;
|
||||
uint32_t k_to_find = ks_to_find_in[slice_idx];
|
||||
|
||||
if (tidx < RADIX_DIGITS) {
|
||||
uint32_t digit_count_cumsum_left = (tidx == 0) ? 0 : temp_storage.digit_count_cumsum[tidx - 1];
|
||||
uint32_t position = slice_idx * RADIX_DIGITS + tidx;
|
||||
uint32_t digit_count_cumsum = digit_cum_sum[position];
|
||||
uint32_t digit_count_cumsum_left = (tidx == 0) ? 0 : digit_cum_sum[position - 1];
|
||||
|
||||
// if not the last pass: update desired and ks_to_find
|
||||
// if last pass: write out the kth value
|
||||
@ -466,7 +483,7 @@ template <typename Bitwise>
|
||||
__global__ void computeBlockwiseKthCounts(
|
||||
Bitwise* desires, // size: num_slices
|
||||
short* counts, // size: num_slices * blocks_per_slice * radix_digits
|
||||
uint32_t num_blocks, // the number of blocks used by `radixFindKthValues` kernel
|
||||
uint32_t num_blocks, // the number of blocks used by `computeBlockDigitCounts` kernel
|
||||
uint32_t blocks_per_slice,
|
||||
// outputs:
|
||||
uint32_t* kthCounts // size: num_slices * blocks_per_slice == num_blocks
|
||||
@ -649,9 +666,7 @@ void launch(
|
||||
T* kthValues = reinterpret_cast<T*>(kthValues_buffer.get());
|
||||
|
||||
TORCH_CHECK(blocks_per_slice <= std::numeric_limits<uint32_t>::max(), "blocks_per_slice larger than uint32 maximum is not supported");
|
||||
auto semaphores_buffer = allocator.allocate(numInputSlices * sizeof(uint32_t));
|
||||
uint32_t* semaphores = reinterpret_cast<uint32_t*>(semaphores_buffer.get());
|
||||
AT_CUDA_CHECK(cudaMemsetAsync(semaphores, 0, numInputSlices * sizeof(uint32_t), stream));
|
||||
|
||||
|
||||
auto ks_to_find_buffer = allocator.allocate(2 * numInputSlices * sizeof(uint32_t));
|
||||
uint32_t* ks_to_find = reinterpret_cast<uint32_t*>(ks_to_find_buffer.get());
|
||||
@ -668,6 +683,10 @@ void launch(
|
||||
static_assert(MAX_ITEMS_PER_THREAD * BLOCK_THREADS < std::numeric_limits<short>::max(),
|
||||
"blockwise counter too large");
|
||||
|
||||
auto digit_cum_sum_buffer = allocator.allocate(numInputSlices * RADIX_DIGITS * sizeof(uint32_t));
|
||||
uint32_t* digit_cum_sum = reinterpret_cast<uint32_t*>(digit_cum_sum_buffer.get());
|
||||
AT_CUDA_CHECK(cudaMemsetAsync(digit_cum_sum, 0, numInputSlices * RADIX_DIGITS * sizeof(uint32_t), stream));
|
||||
|
||||
#if CUB_SUPPORTS_SCAN_BY_KEY()
|
||||
auto withinKCounts_buffer = allocator.allocate(num_blocks * sizeof(uint32_t));
|
||||
uint32_t* withinKCounts = reinterpret_cast<uint32_t*>(withinKCounts_buffer.get());
|
||||
@ -691,7 +710,7 @@ void launch(
|
||||
|
||||
// iterate radix bits for multiple passes
|
||||
for (int current_bit = sizeof(T) * 8 - RADIX_BITS; current_bit >= 0; current_bit -= RADIX_BITS) {
|
||||
radixFindKthValues<T, IndexType, Bitwise, Dim><<<grid, block, 0, stream>>>(
|
||||
computeBlockDigitCounts<T, IndexType, Bitwise, Dim><<<grid, block, 0, stream>>>(
|
||||
input,
|
||||
inputSliceSize,
|
||||
ks_to_find_in, // unused arg
|
||||
@ -704,10 +723,14 @@ void launch(
|
||||
desired_in,
|
||||
counts);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
|
||||
computeDigitCumSum<<<numInputSlices, RADIX_DIGITS, 0, stream>>>(counts, digit_cum_sum, blocks_per_slice);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
|
||||
// we unconditionally call this kernel to update desired/ks_to_find/kthValues
|
||||
// if cub supports scan_by_key we additionally do k counts
|
||||
computeBlockwiseWithinKCounts<Bitwise, T><<<grid, RADIX_DIGITS, 0, stream>>>(
|
||||
desired_in, counts, ks_to_find_in, blocks_per_slice, current_bit, largest, withinKCounts, kthValues, ks_to_find_out, desired_out, num_blocks);
|
||||
desired_in, counts, digit_cum_sum, ks_to_find_in, blocks_per_slice, current_bit, largest, withinKCounts, kthValues, ks_to_find_out, desired_out, num_blocks);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
// swap desired/ks_to_find in and out for next iter
|
||||
auto tmp_desired = desired_in;
|
||||
|
@ -1107,10 +1107,14 @@ void ldl_factor_kernel(
|
||||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
switch (preferred_backend) {
|
||||
case at::LinalgBackend::Cusolver:
|
||||
return ldl_factor_cusolver(
|
||||
{ ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Magma:
|
||||
return ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
{ ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
// By default use cusolver if available and magma otherwise.
|
||||
// If cusolver and magma 2.5.4+ are both available and hermitian=true,
|
||||
@ -1122,8 +1126,10 @@ void ldl_factor_kernel(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
}
|
||||
#endif
|
||||
return ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
{ ldl_factor_cusolver(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
return ldl_factor_magma(LD, pivots, info, upper, hermitian);
|
||||
#endif
|
||||
@ -1839,11 +1845,14 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) {
|
||||
// For the benchmarks see
|
||||
// https://github.com/pytorch/pytorch/pull/56253#discussion_r622851107
|
||||
if (input.size(-2) <= 256 && batchCount(input) >= std::max<int64_t>(2, input.size(-2) / 16)) {
|
||||
return geqrf_batched_cublas(input, tau);
|
||||
geqrf_batched_cublas(input, tau);
|
||||
return;
|
||||
} else {
|
||||
return geqrf_cusolver(input, tau);
|
||||
geqrf_cusolver(input, tau);
|
||||
return;
|
||||
}
|
||||
return geqrf_batched_cublas(input, tau);
|
||||
geqrf_batched_cublas(input, tau);
|
||||
return;
|
||||
};
|
||||
|
||||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
@ -1856,10 +1865,14 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) {
|
||||
// - ?geqrf_gpu allows fast computation of Q via ?orgqr_gpu, but doesn't give R properly.
|
||||
// - ?geqrf2_gpu gives correct R, but doesn't allow computation of Q via ?orgqr_gpu
|
||||
case at::LinalgBackend::Magma:
|
||||
return geqrf_magma(input, tau);
|
||||
{ geqrf_magma(input, tau);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Cusolver:
|
||||
default:
|
||||
return geqrf_cusolver_backend(input, tau);
|
||||
{ geqrf_cusolver_backend(input, tau);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
return geqrf_magma(input, tau);
|
||||
@ -2703,13 +2716,17 @@ void gels_looped(const Tensor& a, Tensor& b, Tensor& infos) {
|
||||
auto preferred_backend = at::globalContext().linalgPreferredBackend();
|
||||
switch (preferred_backend) {
|
||||
case at::LinalgBackend::Magma:
|
||||
return gels_magma(a, b, infos);
|
||||
{ gels_magma(a, b, infos);
|
||||
return;
|
||||
}
|
||||
case at::LinalgBackend::Cusolver:
|
||||
default:
|
||||
// linalg_lstsq_gels is a generic function that is implemented using
|
||||
// geqrf_stub, ormqr_stub, and triangular_solve_stub
|
||||
// It dispatches to cuSOLVER for CUDA inputs if USE_LINALG_SOLVER is defined
|
||||
return linalg_lstsq_gels(a, b, infos);
|
||||
{ linalg_lstsq_gels(a, b, infos);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else
|
||||
return gels_magma(a, b, infos);
|
||||
|
@ -1222,7 +1222,7 @@ cudnnRNNAlgo_t get_algo(
|
||||
}
|
||||
|
||||
cudnnDataType_t promote_rnn_math_type(cudnnDataType_t dtype) {
|
||||
if (dtype == CUDNN_DATA_HALF) {
|
||||
if (dtype == CUDNN_DATA_HALF || dtype == CUDNN_DATA_BFLOAT16) {
|
||||
return CUDNN_DATA_FLOAT;
|
||||
}
|
||||
return dtype;
|
||||
|
@ -373,59 +373,67 @@ void addmm_out_sparse_csr(
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(0, 1).to_sparse_csr(),
|
||||
mat1.transpose(0, 1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(0, 1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseBsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(
|
||||
addmm_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseCsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_sparse_input_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_sparse_input_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
return addmm_sparse_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_sparse_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: CSR @ CSC kernel would be very fast due to format alignment
|
||||
return addmm_sparse_input_dense_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_input_dense_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: CSR @ CSC kernel would be very fast due to format alignment
|
||||
return addmm_sparse_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -433,56 +441,62 @@ void addmm_out_sparse_csr(
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: avoid csc->csr conversion with native csc support
|
||||
return addmm_dense_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
addmm_dense_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: avoid csc->csr conversion with native csc support
|
||||
return addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_sparse_input_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
addmm_sparse_input_dense_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO avoid csc->csr
|
||||
return addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
addmm_sparse_result(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
if (result.layout() == kSparseCsc) {
|
||||
return addmm_sparse_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
addmm_sparse_result(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseBsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
addmm_dense_result(mat1, mat2, beta, alpha, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"addmm: computation on CPU is not implemented for ",
|
||||
result.layout(),
|
||||
" + ",
|
||||
mat1.layout(),
|
||||
" @ ",
|
||||
mat2.layout());
|
||||
false,
|
||||
"addmm: computation on CPU is not implemented for ",
|
||||
result.layout(),
|
||||
" + ",
|
||||
mat1.layout(),
|
||||
" @ ",
|
||||
mat2.layout());
|
||||
}
|
||||
|
||||
/*
|
||||
@ -496,16 +510,16 @@ void addmm_out_sparse_csr(
|
||||
[out] result of the operation.
|
||||
*/
|
||||
void addmv_out_sparse_csr(
|
||||
const Tensor& mat,
|
||||
const Tensor& vec,
|
||||
const Scalar& beta,
|
||||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
const Tensor& mat,
|
||||
const Tensor& vec,
|
||||
const Scalar& beta,
|
||||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
#if !AT_USE_MKL_SPARSE()
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Calling addmv on a sparse CPU tensor requires Linux platform. ",
|
||||
"Please use PyTorch built with MKL on Linux.");
|
||||
false,
|
||||
"Calling addmv on a sparse CPU tensor requires Linux platform. ",
|
||||
"Please use PyTorch built with MKL on Linux.");
|
||||
#else
|
||||
c10::MaybeOwned<Tensor> result_ = prepare_dense_vector_for_mkl(result);
|
||||
c10::MaybeOwned<Tensor> vec_ = prepare_dense_vector_for_mkl(vec);
|
||||
|
@ -5,38 +5,6 @@
|
||||
# representing ScalarType's. They are now superseded by usage of
|
||||
# `aten::to()`. The ops remain here for backward compatibility purposes.
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Byte(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Char(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Double(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Float(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Int(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Long(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Short(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# DEPRECATED. DO NOT USE
|
||||
- func: _cast_Half(Tensor self, bool non_blocking=False) -> Tensor
|
||||
variants: function
|
||||
|
||||
# Computes the gradient of current tensor w.r.t. graph leaves.
|
||||
- func: _backward(Tensor self, Tensor[] inputs, Tensor? gradient=None, bool? retain_graph=None, bool create_graph=False) -> ()
|
||||
manual_cpp_binding: True
|
||||
|
@ -810,7 +810,8 @@ void addmm_out_sparse_csr(
|
||||
if (mat1.layout() == kSparseBsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided)
|
||||
return block_sparse_mm(input, mat1, mat2, beta, alpha, result);
|
||||
{ block_sparse_mm(input, mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -819,13 +820,13 @@ void addmm_out_sparse_csr(
|
||||
if (result.layout() == kStrided) {
|
||||
auto result_t = result.transpose(-2, -1);
|
||||
auto input_t = (result.is_same(input) ? result_t : input.transpose(-2, -1));
|
||||
return block_sparse_mm(
|
||||
block_sparse_mm(
|
||||
input_t,
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result_t);
|
||||
result_t); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -840,41 +841,41 @@ void addmm_out_sparse_csr(
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spmm(
|
||||
spmm(
|
||||
mat2.transpose(0, 1).to_sparse_csr(),
|
||||
mat1.transpose(0, 1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(0, 1));
|
||||
result.transpose(0, 1)); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kStrided) {
|
||||
return spmm(
|
||||
spmm(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
result.transpose(-2, -1)); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mat1.layout() == kSparseCsr) {
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
return spmm(mat1, mat2, beta, alpha, result);
|
||||
spmm(mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
return spgemm(mat1, mat2, beta, alpha, result);
|
||||
spgemm(mat1, mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
// CSR @ CSC kernel would be very fast due to format alignment
|
||||
return spgemm(mat1, mat2.to_sparse_csr(), beta, alpha, result);
|
||||
spgemm(mat1, mat2.to_sparse_csr(), beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -882,27 +883,28 @@ void addmm_out_sparse_csr(
|
||||
if (mat2.layout() == kStrided) {
|
||||
if (result.layout() == kStrided) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spmm(mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
spmm(mat1.to_sparse_csr(), mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsr) {
|
||||
if (result.layout() == kSparseCsr)
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spgemm(mat1.to_sparse_csr(), mat2, beta, alpha, result);
|
||||
{ spgemm(mat1.to_sparse_csr(), mat2, beta, alpha, result); return;
|
||||
}
|
||||
}
|
||||
if (mat2.layout() == kSparseCsc) {
|
||||
if (result.layout() == kSparseCsr) {
|
||||
// TODO: Add native CSC support via cuSPARSE if supported.
|
||||
return spgemm(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result);
|
||||
spgemm(
|
||||
mat1.to_sparse_csr(), mat2.to_sparse_csr(), beta, alpha, result); return;
|
||||
}
|
||||
if (result.layout() == kSparseCsc) {
|
||||
return spgemm(
|
||||
spgemm(
|
||||
mat2.transpose(-2, -1),
|
||||
mat1.transpose(-2, -1),
|
||||
beta,
|
||||
alpha,
|
||||
result.transpose(-2, -1));
|
||||
result.transpose(-2, -1)); return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -933,7 +935,7 @@ void addmv_out_sparse_csr(
|
||||
const Scalar& alpha,
|
||||
const Tensor& result) {
|
||||
if (mat.layout() == kSparseBsr) {
|
||||
return block_sparse_mv(mat, vec, beta, alpha, result);
|
||||
block_sparse_mv(mat, vec, beta, alpha, result); return;
|
||||
}
|
||||
cusparseOperation_t opA = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||
|
||||
@ -1213,9 +1215,9 @@ void triangular_solve_out_sparse_csr(
|
||||
}
|
||||
if (A.layout() == kSparseBsr) {
|
||||
if (B.size(-1) == 1) {
|
||||
return block_sparse_triangular_solve_vec(A, B, X, upper, transpose, unitriangular);
|
||||
block_sparse_triangular_solve_vec(A, B, X, upper, transpose, unitriangular); return;
|
||||
} else {
|
||||
return block_sparse_triangular_solve_mat(A, B, X, upper, transpose, unitriangular);
|
||||
block_sparse_triangular_solve_mat(A, B, X, upper, transpose, unitriangular); return;
|
||||
}
|
||||
}
|
||||
#ifdef USE_ROCM
|
||||
|
@ -117,7 +117,7 @@ class FwdKernel:
|
||||
def get_all(cls) -> list["FwdKernel"]:
|
||||
kernels: list[FwdKernel] = []
|
||||
for aligned, dtype, (sm, sm_max) in itertools.product(
|
||||
[True, False], DTYPES.keys(), zip(SM, SM[1:])
|
||||
[True, False], DTYPES.keys(), itertools.pairwise(SM)
|
||||
):
|
||||
# Remove some kernels we don't use
|
||||
if dtype == "bf16" and sm < 80:
|
||||
@ -228,7 +228,7 @@ class BwdKernel:
|
||||
for aligned, dtype, (sm, sm_max), apply_dropout, max_k in itertools.product(
|
||||
[True, False],
|
||||
DTYPES.keys(),
|
||||
zip(SM, SM[1:]),
|
||||
itertools.pairwise(SM),
|
||||
[True, False],
|
||||
[32, 64, 128, 2**16],
|
||||
):
|
||||
|
191
benchmarks/distributed/bench_nvshmem_tile_reduce.py
Normal file
191
benchmarks/distributed/bench_nvshmem_tile_reduce.py
Normal file
@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Benchmark for NVSHMEM tile reduce operations.
|
||||
|
||||
Usage:
|
||||
python benchmarks/distributed/bench_nvshmem_tile_reduce.py
|
||||
|
||||
This benchmark measures the performance of tile reduce operations across different
|
||||
matrix sizes and tile configurations.
|
||||
"""
|
||||
|
||||
import time
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import torch.distributed._symmetric_memory as symm_mem
|
||||
from torch.testing._internal.common_distributed import MultiProcContinuousTest
|
||||
from torch.testing._internal.common_utils import (
|
||||
requires_cuda_p2p_access,
|
||||
skip_but_pass_in_sandcastle_if,
|
||||
skipIfRocm,
|
||||
)
|
||||
|
||||
|
||||
# Decorator
|
||||
def requires_nvshmem():
|
||||
return skip_but_pass_in_sandcastle_if(
|
||||
not symm_mem.is_nvshmem_available(),
|
||||
"bench_nvshmem_tile_reduce requires NVSHMEM, skipping benchmark",
|
||||
)
|
||||
|
||||
|
||||
# So that benchmarks are written in device-agnostic way
|
||||
device_type = "cuda"
|
||||
device_module = torch.get_device_module(device_type)
|
||||
|
||||
|
||||
@requires_nvshmem()
|
||||
@requires_cuda_p2p_access()
|
||||
class NVSHMEMTileReduceBenchmark(MultiProcContinuousTest):
|
||||
def _init_device(self) -> None:
|
||||
# TODO: relieve this (seems to hang if without)
|
||||
device_module.set_device(self.device)
|
||||
# Set NVSHMEM as SymmMem backend
|
||||
symm_mem.set_backend("NVSHMEM")
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
return torch.device(device_type, self.rank)
|
||||
|
||||
def _benchmark_tile_reduce_single(
|
||||
self,
|
||||
full_size: int,
|
||||
tile_size: int,
|
||||
warmup_iters: int = 5,
|
||||
bench_iters: int = 10,
|
||||
) -> dict:
|
||||
"""
|
||||
Benchmark a single configuration of tile reduce.
|
||||
|
||||
Args:
|
||||
full_size: Size of the full matrix (full_size x full_size)
|
||||
warmup_iters: Number of warmup iterations
|
||||
bench_iters: Number of benchmark iterations
|
||||
|
||||
Returns:
|
||||
Dictionary with benchmark results
|
||||
"""
|
||||
self._init_device()
|
||||
group_name = dist.group.WORLD.group_name
|
||||
symm_mem.enable_symm_mem_for_group(group_name)
|
||||
|
||||
dtype = torch.float
|
||||
|
||||
# Allocate full matrices
|
||||
full_inp = symm_mem.empty(
|
||||
full_size, full_size, dtype=dtype, device=self.device
|
||||
).fill_(self.rank)
|
||||
full_out = symm_mem.empty(
|
||||
full_size, full_size, dtype=dtype, device=self.device
|
||||
).fill_(0)
|
||||
|
||||
slice_ut = slice(0, tile_size)
|
||||
inp_tile = full_inp[slice_ut, slice_ut]
|
||||
out_tile = full_out[slice_ut, slice_ut]
|
||||
|
||||
root = 0
|
||||
|
||||
# Warmup iterations
|
||||
for _ in range(warmup_iters):
|
||||
torch.ops.symm_mem.tile_reduce(inp_tile, out_tile, root, group_name)
|
||||
torch.cuda.synchronize(self.device)
|
||||
|
||||
# Benchmark iterations
|
||||
times = []
|
||||
|
||||
dist.barrier()
|
||||
torch.cuda.synchronize(self.device)
|
||||
start_time = time.perf_counter()
|
||||
|
||||
for _ in range(bench_iters):
|
||||
torch.ops.symm_mem.tile_reduce(inp_tile, out_tile, root, group_name)
|
||||
|
||||
torch.cuda.synchronize(self.device)
|
||||
end_time = time.perf_counter()
|
||||
times.append((end_time - start_time) / bench_iters)
|
||||
|
||||
# Calculate statistics
|
||||
times = torch.tensor(times, dtype=torch.float64)
|
||||
tile_elements = tile_size * tile_size
|
||||
tile_bytes = (
|
||||
tile_elements * dtype.itemsize
|
||||
if hasattr(dtype, "itemsize")
|
||||
else tile_elements * 4
|
||||
)
|
||||
|
||||
results = {
|
||||
"full_size": full_size,
|
||||
"tile_size": tile_size,
|
||||
"tile_elements": tile_elements,
|
||||
"tile_bytes": tile_bytes,
|
||||
"world_size": self.world_size,
|
||||
"mean_time_ms": times.mean().item() * 1000,
|
||||
"std_time_ms": times.std().item() * 1000,
|
||||
"min_time_ms": times.min().item() * 1000,
|
||||
"max_time_ms": times.max().item() * 1000,
|
||||
"throughput_gb_s": tile_bytes / (times.mean().item() * 1e9),
|
||||
"elements_per_sec": tile_elements / times.mean().item(),
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
@skipIfRocm
|
||||
def test_benchmark_tile_reduce_various_sizes(self) -> None:
|
||||
"""
|
||||
Benchmark tile reduce across various matrix sizes.
|
||||
"""
|
||||
# Test various matrix sizes
|
||||
tile_sizes = [512, 1024, 2048, 4096, 8192, 16384]
|
||||
full_size = tile_sizes[-1]
|
||||
warmup_iters = 5
|
||||
bench_iters = 20
|
||||
|
||||
results = []
|
||||
|
||||
for tile_size in tile_sizes:
|
||||
try:
|
||||
result = self._benchmark_tile_reduce_single(
|
||||
full_size, tile_size, warmup_iters, bench_iters
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
if self.rank == 0:
|
||||
print(
|
||||
f"Matrix Size: {full_size}x{full_size}, Tile Size: {tile_size}x{tile_size}"
|
||||
)
|
||||
print(
|
||||
f" Mean Time: {result['mean_time_ms']:.3f} ± {result['std_time_ms']:.3f} ms"
|
||||
)
|
||||
print(f" Throughput: {result['throughput_gb_s']:.2f} GB/s")
|
||||
print(f" Bytes: {result['tile_bytes']:.0f}")
|
||||
print()
|
||||
|
||||
except Exception as e:
|
||||
if self.rank == 0:
|
||||
print(f"Failed to benchmark matrix size {full_size}: {e}")
|
||||
|
||||
# Print summary
|
||||
if self.rank == 0 and results:
|
||||
print("=== BENCHMARK SUMMARY ===")
|
||||
print(
|
||||
f"{'Matrix Size':<12} {'Tile Size':<10} {'Time (ms)':<12} {'Throughput (GB/s)':<18} {'Bytes':<15}"
|
||||
)
|
||||
print("-" * 70)
|
||||
|
||||
for result in results:
|
||||
print(
|
||||
f"{result['full_size']}x{result['full_size']:<7} "
|
||||
f"{result['tile_size']}x{result['tile_size']:<5} "
|
||||
f"{result['mean_time_ms']:<12.3f} "
|
||||
f"{result['throughput_gb_s']:<18.2f} "
|
||||
f"{result['tile_bytes']:<15.0f}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# For standalone usage, you'd need to set up distributed environment
|
||||
# For now, this is meant to be run via the PyTorch test framework
|
||||
from torch.testing._internal.common_utils import run_tests
|
||||
|
||||
run_tests()
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,58 +6,26 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,0
|
||||
|
||||
|
||||
@ -66,10 +34,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -82,10 +46,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -94,10 +54,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -106,18 +62,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -126,26 +74,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,58 +6,26 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,0
|
||||
|
||||
|
||||
@ -66,10 +34,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -82,10 +46,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -94,10 +54,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -106,18 +62,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -126,26 +74,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,58 +6,26 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,0
|
||||
|
||||
|
||||
@ -66,10 +34,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -82,10 +46,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -94,10 +54,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -106,18 +62,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -126,26 +74,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,4
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,0
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,0
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,0
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,0
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,0
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,0
|
||||
|
||||
|
||||
|
|
@ -6,10 +6,6 @@ AlbertForMaskedLM,pass,4
|
||||
|
||||
|
||||
|
||||
AlbertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
AllenaiLongformerBase,pass,9
|
||||
|
||||
|
||||
@ -18,50 +14,22 @@ BartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
BertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
BlenderbotSmallForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
CamemBert,pass,5
|
||||
|
||||
|
||||
|
||||
DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
|
||||
|
||||
|
||||
DistilBertForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
DistilBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,7
|
||||
|
||||
|
||||
@ -70,10 +38,6 @@ ElectraForCausalLM,pass,4
|
||||
|
||||
|
||||
|
||||
ElectraForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
GPT2ForSequenceClassification,pass,6
|
||||
|
||||
|
||||
@ -86,10 +50,6 @@ LayoutLMForMaskedLM,pass,5
|
||||
|
||||
|
||||
|
||||
LayoutLMForSequenceClassification,pass,6
|
||||
|
||||
|
||||
|
||||
M2M100ForConditionalGeneration,pass,4
|
||||
|
||||
|
||||
@ -98,10 +58,6 @@ MBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
MBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
MT5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
@ -110,18 +66,10 @@ MegatronBertForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
MegatronBertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
MobileBertForMaskedLM,pass,3
|
||||
|
||||
|
||||
|
||||
MobileBertForQuestionAnswering,pass,3
|
||||
|
||||
|
||||
|
||||
OPTForCausalLM,pass,8
|
||||
|
||||
|
||||
@ -130,26 +78,14 @@ PLBartForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PLBartForConditionalGeneration,pass,8
|
||||
|
||||
|
||||
|
||||
PegasusForCausalLM,pass,6
|
||||
|
||||
|
||||
|
||||
PegasusForConditionalGeneration,pass,7
|
||||
|
||||
|
||||
|
||||
RobertaForCausalLM,pass,5
|
||||
|
||||
|
||||
|
||||
RobertaForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
T5ForConditionalGeneration,pass,5
|
||||
|
||||
|
||||
|
|
@ -123,8 +123,6 @@ CI_SKIP_OPTIMIZER = {
|
||||
# HF
|
||||
"pnasnet5large", # Stack issue in fx
|
||||
"MobileBertForMaskedLM", # Stack issue in fx
|
||||
"MobileBertForQuestionAnswering", # Stack issue in fx
|
||||
"PegasusForConditionalGeneration", # OOM
|
||||
}
|
||||
|
||||
try:
|
||||
@ -192,17 +190,11 @@ BENCHMARK_USE_SGD = {
|
||||
# HF
|
||||
"AlbertForMaskedLM",
|
||||
"BartForCausalLM",
|
||||
"BartForConditionalGeneration",
|
||||
"BlenderbotSmallForCausalLM",
|
||||
"BlenderbotSmallForConditionalGeneration",
|
||||
"DebertaV2ForQuestionAnswering", # eager OOM
|
||||
"ElectraForCausalLM",
|
||||
"M2M100ForConditionalGeneration",
|
||||
"MBartForCausalLM",
|
||||
"MBartForConditionalGeneration",
|
||||
"OPTForCausalLM",
|
||||
"PLBartForCausalLM",
|
||||
"PLBartForConditionalGeneration",
|
||||
"PegasusForCausalLM",
|
||||
"TrOCRForCausalLM",
|
||||
"XGLMForCausalLM",
|
||||
@ -3789,7 +3781,6 @@ def run(runner, args, original_dir=None):
|
||||
torch.use_deterministic_algorithms(True, warn_only=True)
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
||||
if args.only is not None and args.only in {
|
||||
"DebertaForQuestionAnswering",
|
||||
"nvidia_deeprecommender",
|
||||
"crossvit_9_240",
|
||||
}:
|
||||
|
@ -59,7 +59,6 @@ imports = [
|
||||
"BigBirdConfig",
|
||||
"BlenderbotForConditionalGeneration",
|
||||
"BlenderbotModel",
|
||||
"BlenderbotSmallForConditionalGeneration",
|
||||
"BlenderbotSmallModel",
|
||||
"CLIPModel",
|
||||
"CLIPVisionModel",
|
||||
@ -73,7 +72,6 @@ imports = [
|
||||
"MarianForCausalLM",
|
||||
"MarianModel",
|
||||
"MarianMTModel",
|
||||
"PegasusForConditionalGeneration",
|
||||
"PegasusModel",
|
||||
"ReformerConfig",
|
||||
"ViTForImageClassification",
|
||||
@ -167,7 +165,7 @@ def get_sequence_length(model_cls, model_name):
|
||||
"Bert",
|
||||
"Roberta",
|
||||
)
|
||||
) or model_name in ("DistillGPT2", "GoogleFnet", "YituTechConvBert", "CamemBert"):
|
||||
) or model_name in ("DistillGPT2", "GoogleFnet", "YituTechConvBert"):
|
||||
seq_length = 512
|
||||
elif model_name in ("TrOCRForCausalLM"):
|
||||
seq_length = 256
|
||||
@ -222,9 +220,7 @@ def generate_inputs_for_model(
|
||||
BlenderbotModel,
|
||||
BlenderbotSmallModel,
|
||||
BlenderbotForConditionalGeneration,
|
||||
BlenderbotSmallForConditionalGeneration,
|
||||
PegasusModel,
|
||||
PegasusForConditionalGeneration,
|
||||
MarianModel,
|
||||
MarianMTModel,
|
||||
]:
|
||||
@ -333,10 +329,6 @@ EXTRA_MODELS = {
|
||||
AutoConfig.from_pretrained("YituTech/conv-bert-base"),
|
||||
AutoModelForMaskedLM,
|
||||
),
|
||||
"CamemBert": (
|
||||
AutoConfig.from_pretrained("camembert-base"),
|
||||
AutoModelForMaskedLM,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@ -375,8 +367,6 @@ class HuggingfaceRunner(BenchmarkRunner):
|
||||
|
||||
def use_larger_multiplier_for_smaller_tensor(self, name):
|
||||
return name in [
|
||||
"ElectraForQuestionAnswering",
|
||||
"MegatronBertForQuestionAnswering",
|
||||
"GPT2ForSequenceClassification",
|
||||
]
|
||||
|
||||
|
@ -31,24 +31,15 @@ batch_size:
|
||||
# TODO - Fails even after fake tensors
|
||||
divisors:
|
||||
AlbertForMaskedLM: 2
|
||||
AlbertForQuestionAnswering: 2
|
||||
AllenaiLongformerBase: 2
|
||||
BartForCausalLM: 2
|
||||
BartForConditionalGeneration: 2
|
||||
BertForMaskedLM: 2
|
||||
BertForQuestionAnswering: 2
|
||||
BlenderbotForCausalLM: 8
|
||||
# BlenderbotForConditionalGeneration : 16
|
||||
BlenderbotSmallForCausalLM: 4
|
||||
BlenderbotSmallForConditionalGeneration: 2
|
||||
CamemBert: 2
|
||||
DebertaV2ForMaskedLM: 4
|
||||
DebertaV2ForQuestionAnswering: 8
|
||||
DistilBertForMaskedLM: 2
|
||||
DistilBertForQuestionAnswering: 2
|
||||
DistillGPT2: 2
|
||||
ElectraForCausalLM: 2
|
||||
ElectraForQuestionAnswering: 2
|
||||
GPT2ForSequenceClassification: 2
|
||||
# GPTJForCausalLM : 2
|
||||
# GPTJForQuestionAnswering : 2
|
||||
@ -56,22 +47,15 @@ batch_size:
|
||||
# GPTNeoForSequenceClassification : 2
|
||||
GoogleFnet: 2
|
||||
LayoutLMForMaskedLM: 2
|
||||
LayoutLMForSequenceClassification: 2
|
||||
M2M100ForConditionalGeneration: 4
|
||||
MBartForCausalLM: 2
|
||||
MBartForConditionalGeneration: 2
|
||||
MT5ForConditionalGeneration: 2
|
||||
MegatronBertForCausalLM: 4
|
||||
MegatronBertForQuestionAnswering: 2
|
||||
MobileBertForMaskedLM: 2
|
||||
MobileBertForQuestionAnswering: 2
|
||||
OPTForCausalLM: 2
|
||||
PLBartForCausalLM: 2
|
||||
PLBartForConditionalGeneration: 2
|
||||
PegasusForCausalLM: 4
|
||||
PegasusForConditionalGeneration: 2
|
||||
RobertaForCausalLM: 2
|
||||
RobertaForQuestionAnswering: 2
|
||||
T5ForConditionalGeneration: 2
|
||||
T5Small: 2
|
||||
TrOCRForCausalLM: 2
|
||||
@ -90,20 +74,13 @@ batch_size:
|
||||
tolerance:
|
||||
higher_training:
|
||||
- MT5ForConditionalGeneration
|
||||
# AlbertForQuestionAnswering fails in CI GCP A100 but error does not seem
|
||||
# harmful.
|
||||
- AlbertForQuestionAnswering
|
||||
|
||||
higher_max_autotune_training:
|
||||
# DebertaForQuestionAnswering needs higher tolerance in Max-Autotune mode
|
||||
- DebertaForQuestionAnswering
|
||||
higher_max_autotune_training: []
|
||||
|
||||
higher_inference:
|
||||
- GPT2ForSequenceClassification
|
||||
- RobertaForQuestionAnswering
|
||||
|
||||
higher_inference_cpu:
|
||||
- LayoutLMForSequenceClassification
|
||||
- GPT2ForSequenceClassification
|
||||
|
||||
cosine: []
|
||||
|
@ -1,22 +1,13 @@
|
||||
AlbertForMaskedLM,8
|
||||
AlbertForQuestionAnswering,8
|
||||
AllenaiLongformerBase,8
|
||||
BartForCausalLM,8
|
||||
BartForConditionalGeneration,4
|
||||
BertForMaskedLM,32
|
||||
BertForQuestionAnswering,32
|
||||
BlenderbotForCausalLM,32
|
||||
BlenderbotForConditionalGeneration,16
|
||||
BlenderbotSmallForCausalLM,256
|
||||
BlenderbotSmallForConditionalGeneration,128
|
||||
CamemBert,32
|
||||
DebertaV2ForMaskedLM,8
|
||||
DebertaV2ForQuestionAnswering,8
|
||||
DistilBertForMaskedLM,256
|
||||
DistilBertForQuestionAnswering,512
|
||||
DistillGPT2,32
|
||||
ElectraForCausalLM,64
|
||||
ElectraForQuestionAnswering,128
|
||||
GPT2ForSequenceClassification,8
|
||||
GPTJForCausalLM,1
|
||||
GPTJForQuestionAnswering,1
|
||||
@ -24,22 +15,15 @@ GPTNeoForCausalLM,32
|
||||
GPTNeoForSequenceClassification,32
|
||||
GoogleFnet,32
|
||||
LayoutLMForMaskedLM,32
|
||||
LayoutLMForSequenceClassification,32
|
||||
M2M100ForConditionalGeneration,64
|
||||
MBartForCausalLM,8
|
||||
MBartForConditionalGeneration,4
|
||||
MT5ForConditionalGeneration,32
|
||||
MegatronBertForCausalLM,16
|
||||
MegatronBertForQuestionAnswering,16
|
||||
MobileBertForMaskedLM,256
|
||||
MobileBertForQuestionAnswering,256
|
||||
OPTForCausalLM,4
|
||||
PLBartForCausalLM,16
|
||||
PLBartForConditionalGeneration,8
|
||||
PegasusForCausalLM,128
|
||||
PegasusForConditionalGeneration,64
|
||||
RobertaForCausalLM,32
|
||||
RobertaForQuestionAnswering,32
|
||||
T5ForConditionalGeneration,8
|
||||
T5Small,8
|
||||
TrOCRForCausalLM,64
|
||||
|
@ -1,41 +1,25 @@
|
||||
AlbertForMaskedLM,4
|
||||
AlbertForQuestionAnswering,4
|
||||
AllenaiLongformerBase,4
|
||||
BartForCausalLM,4
|
||||
BartForConditionalGeneration,2
|
||||
BertForMaskedLM,16
|
||||
BertForQuestionAnswering,16
|
||||
BigBird,32
|
||||
BlenderbotForCausalLM,32
|
||||
BlenderbotSmallForCausalLM,64
|
||||
BlenderbotSmallForConditionalGeneration,64
|
||||
CamemBert,16
|
||||
DebertaV2ForMaskedLM,16
|
||||
DebertaV2ForQuestionAnswering,2
|
||||
DistilBertForMaskedLM,128
|
||||
DistilBertForQuestionAnswering,256
|
||||
DistillGPT2,16
|
||||
ElectraForCausalLM,8
|
||||
ElectraForQuestionAnswering,8
|
||||
GoogleFnet,16
|
||||
GPT2ForSequenceClassification,4
|
||||
LayoutLMForMaskedLM,16
|
||||
LayoutLMForSequenceClassification,16
|
||||
M2M100ForConditionalGeneration,16
|
||||
MBartForCausalLM,4
|
||||
MBartForConditionalGeneration,2
|
||||
MegatronBertForCausalLM,4
|
||||
MegatronBertForQuestionAnswering,8
|
||||
MobileBertForMaskedLM,64
|
||||
MobileBertForQuestionAnswering,64
|
||||
MT5ForConditionalGeneration,16
|
||||
OPTForCausalLM,2
|
||||
PegasusForCausalLM,32
|
||||
PegasusForConditionalGeneration,32
|
||||
PLBartForCausalLM,8
|
||||
PLBartForConditionalGeneration,4
|
||||
RobertaForCausalLM,16
|
||||
RobertaForQuestionAnswering,16
|
||||
T5ForConditionalGeneration,4
|
||||
T5Small,1
|
||||
TrOCRForCausalLM,32
|
||||
|
@ -1038,7 +1038,8 @@ def define_buck_targets(
|
||||
name = "generated-version-header",
|
||||
header_namespace = "torch",
|
||||
exported_headers = {
|
||||
"version.h": ":generate-version-header[version.h]",
|
||||
"headeronly/version.h": ":generate-version-header[version.h]",
|
||||
"version.h": "torch/csrc/api/include/torch/version.h"
|
||||
},
|
||||
labels = labels,
|
||||
)
|
||||
@ -1047,19 +1048,27 @@ def define_buck_targets(
|
||||
fb_native.genrule(
|
||||
name = "generate-version-header",
|
||||
srcs = [
|
||||
"torch/csrc/api/include/torch/version.h.in",
|
||||
"torch/headeronly/version.h.in",
|
||||
"version.txt",
|
||||
],
|
||||
cmd = "$(exe {}tools:gen-version-header) ".format(ROOT_PATH) + " ".join([
|
||||
cmd = "mkdir -p $OUT/torch/headeronly && $(exe {}tools:gen-version-header) ".format(ROOT_PATH) + " ".join([
|
||||
"--template-path",
|
||||
"torch/csrc/api/include/torch/version.h.in",
|
||||
"torch/headeronly/version.h.in",
|
||||
"--version-path",
|
||||
"version.txt",
|
||||
"--output-path",
|
||||
"$OUT/version.h",
|
||||
"$OUT/torch/headeronly/version.h",
|
||||
]),
|
||||
cmd_exe = "md $OUT\\torch\\headeronly 2>nul & $(exe {}tools:gen-version-header) ".format(ROOT_PATH) + " ".join([
|
||||
"--template-path",
|
||||
"torch/headeronly/version.h.in",
|
||||
"--version-path",
|
||||
"version.txt",
|
||||
"--output-path",
|
||||
"$OUT\\torch\\headeronly\\version.h",
|
||||
]),
|
||||
outs = {
|
||||
"version.h": ["version.h"],
|
||||
"version.h": ["torch/headeronly/version.h"],
|
||||
},
|
||||
default_outs = ["."],
|
||||
)
|
||||
|
12
build.bzl
12
build.bzl
@ -142,18 +142,6 @@ def define_targets(rules):
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
rules.genrule(
|
||||
name = "version_h",
|
||||
srcs = [
|
||||
":torch/csrc/api/include/torch/version.h.in",
|
||||
":version.txt",
|
||||
],
|
||||
outs = ["torch/csrc/api/include/torch/version.h"],
|
||||
cmd = "$(execpath //tools/setup_helpers:gen_version_header) " +
|
||||
"--template-path $(location :torch/csrc/api/include/torch/version.h.in) " +
|
||||
"--version-path $(location :version.txt) --output-path $@ ",
|
||||
tools = ["//tools/setup_helpers:gen_version_header"],
|
||||
)
|
||||
|
||||
#
|
||||
# ATen generated code
|
||||
|
@ -913,7 +913,6 @@ libtorch_python_core_sources = [
|
||||
"torch/csrc/autograd/python_torch_functions_manual.cpp",
|
||||
"torch/csrc/autograd/python_variable.cpp",
|
||||
"torch/csrc/autograd/python_variable_indexing.cpp",
|
||||
"torch/csrc/distributed/python_placement.cpp",
|
||||
"torch/csrc/dynamo/python_compiled_autograd.cpp",
|
||||
"torch/csrc/dynamo/cache_entry.cpp",
|
||||
"torch/csrc/dynamo/cpp_shim.cpp",
|
||||
|
@ -127,7 +127,7 @@ struct Event final {
|
||||
}
|
||||
|
||||
void synchronize() const {
|
||||
return impl_.synchronize();
|
||||
impl_.synchronize();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -149,7 +149,7 @@ struct C10_API Storage {
|
||||
}
|
||||
|
||||
void set_data_ptr_noswap(at::DataPtr&& data_ptr) const {
|
||||
return storage_impl_->set_data_ptr_noswap(std::move(data_ptr));
|
||||
storage_impl_->set_data_ptr_noswap(std::move(data_ptr));
|
||||
}
|
||||
|
||||
DeviceType device_type() const {
|
||||
|
@ -94,11 +94,11 @@ class VirtualGuardImpl final : public DeviceGuardImplInterface {
|
||||
}
|
||||
|
||||
void synchronizeEvent(void* event) const override {
|
||||
return impl_->synchronizeEvent(event);
|
||||
impl_->synchronizeEvent(event);
|
||||
}
|
||||
|
||||
void synchronizeDevice(const DeviceIndex device_index) const override {
|
||||
return impl_->synchronizeDevice(device_index);
|
||||
impl_->synchronizeDevice(device_index);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -1183,6 +1183,8 @@ class DeviceCachingAllocator {
|
||||
// device statistics
|
||||
DeviceStats stats;
|
||||
|
||||
c10::DeviceIndex device_id;
|
||||
|
||||
// unallocated cached blocks larger than 1 MB
|
||||
BlockPool large_blocks;
|
||||
|
||||
@ -1271,8 +1273,10 @@ class DeviceCachingAllocator {
|
||||
|
||||
public:
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
||||
DeviceCachingAllocator()
|
||||
: large_blocks(/*small=*/false), small_blocks(/*small=*/true) {
|
||||
explicit DeviceCachingAllocator(c10::DeviceIndex id)
|
||||
: device_id(id),
|
||||
large_blocks(/*small=*/false),
|
||||
small_blocks(/*small=*/true) {
|
||||
stats.max_split_size =
|
||||
static_cast<int64_t>(CUDAAllocatorConfig::max_split_size());
|
||||
context_recorder_.store(nullptr);
|
||||
@ -1358,10 +1362,7 @@ class DeviceCachingAllocator {
|
||||
// All public methods (except the above) acquire the allocator mutex.
|
||||
// Thus, do not call a public method from another public method.
|
||||
|
||||
Block* malloc(
|
||||
c10::DeviceIndex device,
|
||||
size_t orig_size,
|
||||
cudaStream_t stream) {
|
||||
Block* malloc(size_t orig_size, cudaStream_t stream) {
|
||||
// done outside the lock because we don't know what locks the recorder needs
|
||||
// to have...
|
||||
auto context = maybeGatherContext(RecordContext::STATE);
|
||||
@ -1389,7 +1390,7 @@ class DeviceCachingAllocator {
|
||||
size_t size = round_size(orig_size);
|
||||
auto& pool = get_pool(size, stream);
|
||||
const size_t alloc_size = get_allocation_size(size);
|
||||
AllocParams params(device, size, stream, &pool, alloc_size);
|
||||
AllocParams params(device_id, size, stream, &pool, alloc_size);
|
||||
params.stat_types = get_stat_types_for_pool(pool);
|
||||
|
||||
// First, try to get a block from the existing pool.
|
||||
@ -1436,7 +1437,7 @@ class DeviceCachingAllocator {
|
||||
beginAllocateToPool(mempool_id, filter);
|
||||
auto& mempool = get_pool(size, stream);
|
||||
AllocParams mempool_params(
|
||||
device, size, stream, &mempool, alloc_size);
|
||||
device_id, size, stream, &mempool, alloc_size);
|
||||
mempool_params.stat_types = get_stat_types_for_pool(mempool);
|
||||
block_found = get_free_block(mempool_params);
|
||||
endAllocateToPool(mempool_id);
|
||||
@ -1463,7 +1464,7 @@ class DeviceCachingAllocator {
|
||||
allowed_info = format_size(allowed_memory_maximum) + " allowed; ";
|
||||
}
|
||||
|
||||
std::string proc_info = reportProcessMemoryInfo(device);
|
||||
std::string proc_info = reportProcessMemoryInfo(device_id);
|
||||
|
||||
record_trace(
|
||||
TraceEntry::OOM,
|
||||
@ -1481,7 +1482,7 @@ class DeviceCachingAllocator {
|
||||
.current,
|
||||
stats.reserved_bytes[static_cast<int64_t>(StatType::AGGREGATE)]
|
||||
.current,
|
||||
c10::Device(c10::DeviceType::CUDA, device));
|
||||
c10::Device(c10::DeviceType::CUDA, device_id));
|
||||
|
||||
auto allocated_bytes =
|
||||
stats.allocated_bytes[static_cast<size_t>(StatType::AGGREGATE)]
|
||||
@ -1519,7 +1520,7 @@ class DeviceCachingAllocator {
|
||||
lock.unlock();
|
||||
|
||||
for (const auto& obs : observers_local) {
|
||||
obs(device,
|
||||
obs(device_id,
|
||||
alloc_size,
|
||||
set_fraction ? allowed_memory_maximum : device_total,
|
||||
device_free);
|
||||
@ -1549,7 +1550,7 @@ class DeviceCachingAllocator {
|
||||
"CUDA out of memory. Tried to allocate ",
|
||||
format_size(alloc_size),
|
||||
". GPU ",
|
||||
static_cast<int>(device),
|
||||
static_cast<int>(device_id),
|
||||
" has a total capacity of ",
|
||||
format_size(device_total),
|
||||
" of which ",
|
||||
@ -2501,6 +2502,8 @@ class DeviceCachingAllocator {
|
||||
auto divisions = CUDAAllocatorConfig::roundup_power2_divisions(size);
|
||||
if (divisions > 1 && size > (kMinBlockSize * divisions)) {
|
||||
return roundup_power2_next_division(size, divisions);
|
||||
} else if (divisions == 1) {
|
||||
return llvm::PowerOf2Ceil(size);
|
||||
} else {
|
||||
return kMinBlockSize * ((size + kMinBlockSize - 1) / kMinBlockSize);
|
||||
}
|
||||
@ -3809,7 +3812,8 @@ class NativeCachingAllocator : public CUDAAllocator {
|
||||
if (size < device_count) {
|
||||
device_allocator.resize(device_count);
|
||||
for (const auto i : c10::irange(size, device_count)) {
|
||||
device_allocator[i] = std::make_unique<DeviceCachingAllocator>();
|
||||
device_allocator[i] =
|
||||
std::make_unique<DeviceCachingAllocator>(c10::DeviceIndex(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3829,7 +3833,7 @@ class NativeCachingAllocator : public CUDAAllocator {
|
||||
"Allocator not initialized for device ",
|
||||
device,
|
||||
": did you call init?");
|
||||
Block* block = device_allocator[device]->malloc(device, size, stream);
|
||||
Block* block = device_allocator[device]->malloc(size, stream);
|
||||
add_allocated_block(block);
|
||||
*devPtr = block->ptr;
|
||||
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
|
||||
|
@ -360,11 +360,11 @@ inline void* raw_alloc_with_stream(size_t nbytes, cudaStream_t stream) {
|
||||
}
|
||||
|
||||
inline void raw_delete(void* ptr) {
|
||||
return get()->raw_delete(ptr);
|
||||
get()->raw_delete(ptr);
|
||||
}
|
||||
|
||||
inline void init(int device_count) {
|
||||
return get()->init(device_count);
|
||||
get()->init(device_count);
|
||||
}
|
||||
|
||||
inline double getMemoryFraction(c10::DeviceIndex device) {
|
||||
@ -372,7 +372,7 @@ inline double getMemoryFraction(c10::DeviceIndex device) {
|
||||
}
|
||||
|
||||
inline void setMemoryFraction(double fraction, c10::DeviceIndex device) {
|
||||
return get()->setMemoryFraction(fraction, device);
|
||||
get()->setMemoryFraction(fraction, device);
|
||||
}
|
||||
|
||||
inline std::vector<StreamSegmentSize> getExpandableSegmentSizes(
|
||||
@ -381,11 +381,11 @@ inline std::vector<StreamSegmentSize> getExpandableSegmentSizes(
|
||||
}
|
||||
|
||||
inline void emptyCache(MempoolId_t mempool_id = {0, 0}) {
|
||||
return get()->emptyCache(mempool_id);
|
||||
get()->emptyCache(mempool_id);
|
||||
}
|
||||
|
||||
inline void enable(bool value) {
|
||||
return get()->enable(value);
|
||||
get()->enable(value);
|
||||
}
|
||||
|
||||
inline bool isEnabled() {
|
||||
@ -393,7 +393,7 @@ inline bool isEnabled() {
|
||||
}
|
||||
|
||||
inline void cacheInfo(c10::DeviceIndex device, size_t* largestBlock) {
|
||||
return get()->cacheInfo(device, largestBlock);
|
||||
get()->cacheInfo(device, largestBlock);
|
||||
}
|
||||
|
||||
inline void* getBaseAllocation(void* ptr, size_t* size) {
|
||||
@ -401,7 +401,7 @@ inline void* getBaseAllocation(void* ptr, size_t* size) {
|
||||
}
|
||||
|
||||
inline void recordStream(const DataPtr& dataPtr, CUDAStream stream) {
|
||||
return get()->recordStream(dataPtr, stream);
|
||||
get()->recordStream(dataPtr, stream);
|
||||
}
|
||||
|
||||
inline c10::CachingDeviceAllocator::DeviceStats getDeviceStats(
|
||||
@ -410,11 +410,11 @@ inline c10::CachingDeviceAllocator::DeviceStats getDeviceStats(
|
||||
}
|
||||
|
||||
inline void resetAccumulatedStats(c10::DeviceIndex device) {
|
||||
return get()->resetAccumulatedStats(device);
|
||||
get()->resetAccumulatedStats(device);
|
||||
}
|
||||
|
||||
inline void resetPeakStats(c10::DeviceIndex device) {
|
||||
return get()->resetPeakStats(device);
|
||||
get()->resetPeakStats(device);
|
||||
}
|
||||
|
||||
inline SnapshotInfo snapshot(MempoolId_t mempool_id = {0, 0}) {
|
||||
@ -451,21 +451,21 @@ inline void recordHistory(
|
||||
size_t alloc_trace_max_entries,
|
||||
RecordContext when,
|
||||
bool clearHistory) {
|
||||
return get()->recordHistory(
|
||||
get()->recordHistory(
|
||||
enabled, context_recorder, alloc_trace_max_entries, when, clearHistory);
|
||||
}
|
||||
|
||||
inline void recordAnnotation(
|
||||
const std::vector<std::pair<std::string, std::string>>& md) {
|
||||
return get()->recordAnnotation(md);
|
||||
get()->recordAnnotation(md);
|
||||
}
|
||||
|
||||
inline void pushCompileContext(std::string& md) {
|
||||
return get()->pushCompileContext(md);
|
||||
get()->pushCompileContext(md);
|
||||
}
|
||||
|
||||
inline void popCompileContext() {
|
||||
return get()->popCompileContext();
|
||||
get()->popCompileContext();
|
||||
}
|
||||
|
||||
inline bool isHistoryEnabled() {
|
||||
@ -481,15 +481,15 @@ inline bool checkPoolLiveAllocations(
|
||||
}
|
||||
|
||||
inline void attachOutOfMemoryObserver(OutOfMemoryObserver observer) {
|
||||
return get()->attachOutOfMemoryObserver(std::move(observer));
|
||||
get()->attachOutOfMemoryObserver(std::move(observer));
|
||||
}
|
||||
|
||||
inline void attachAllocatorTraceTracker(AllocatorTraceTracker tracker) {
|
||||
return get()->attachAllocatorTraceTracker(std::move(tracker));
|
||||
get()->attachAllocatorTraceTracker(std::move(tracker));
|
||||
}
|
||||
|
||||
inline void releasePool(c10::DeviceIndex device, MempoolId_t mempool_id) {
|
||||
return get()->releasePool(device, mempool_id);
|
||||
get()->releasePool(device, mempool_id);
|
||||
}
|
||||
inline void createOrIncrefPool(
|
||||
c10::DeviceIndex device,
|
||||
@ -533,7 +533,7 @@ inline cudaError_t memcpyAsync(
|
||||
inline void enablePeerAccess(
|
||||
c10::DeviceIndex dev,
|
||||
c10::DeviceIndex dev_to_access) {
|
||||
return get()->enablePeerAccess(dev, dev_to_access);
|
||||
get()->enablePeerAccess(dev, dev_to_access);
|
||||
}
|
||||
|
||||
} // namespace c10::cuda::CUDACachingAllocator
|
||||
|
@ -51,17 +51,6 @@
|
||||
|
||||
#if defined(CUDA_VERSION) && (CUDA_VERSION >= 12030)
|
||||
#define C10_LIBCUDA_DRIVER_API_OPTIONAL(_) \
|
||||
_(cuCtxFromGreenCtx, 12080) \
|
||||
_(cuCtxGetCurrent, 12080) \
|
||||
_(cuCtxPopCurrent, 12080) \
|
||||
_(cuCtxPushCurrent, 12080) \
|
||||
_(cuCtxSetCurrent, 12080) \
|
||||
_(cuGreenCtxCreate, 12080) \
|
||||
_(cuGreenCtxDestroy, 12080) \
|
||||
_(cuDevSmResourceSplitByCount, 12080) \
|
||||
_(cuDeviceGet, 12080) \
|
||||
_(cuDeviceGetDevResource, 12080) \
|
||||
_(cuDevResourceGenerateDesc, 12080) \
|
||||
_(cuMulticastAddDevice, 12030) \
|
||||
_(cuMulticastBindMem, 12030) \
|
||||
_(cuMulticastCreate, 12030) \
|
||||
|
@ -49,7 +49,7 @@ class DynamicBackendWrapper : public WaitCounterBackendIf {
|
||||
|
||||
void stop(std::chrono::steady_clock::time_point now, intptr_t ctx) noexcept
|
||||
override {
|
||||
return impl_.stop(
|
||||
impl_.stop(
|
||||
impl_.self,
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now.time_since_epoch())
|
||||
@ -162,6 +162,6 @@ WaitCounterHandle::WaitGuard WaitCounterHandle::start() {
|
||||
}
|
||||
|
||||
void WaitCounterHandle::stop(const SmallVector<intptr_t>& ctxs) {
|
||||
return impl_.stop(ctxs);
|
||||
impl_.stop(ctxs);
|
||||
}
|
||||
} // namespace c10::monitor
|
||||
|
@ -243,8 +243,8 @@ configure_file("${TORCH_SRC_DIR}/_utils_internal.py"
|
||||
COPYONLY)
|
||||
|
||||
# Generate header with version info
|
||||
configure_file("${TORCH_SRC_DIR}/csrc/api/include/torch/version.h.in"
|
||||
"${TORCH_SRC_DIR}/csrc/api/include/torch/version.h"
|
||||
configure_file("${TORCH_SRC_DIR}/headeronly/version.h.in"
|
||||
"${TORCH_SRC_DIR}/headeronly/version.h"
|
||||
@ONLY)
|
||||
|
||||
set(GENERATED_CXX_TORCH
|
||||
|
@ -207,6 +207,42 @@ templates_path = [
|
||||
]
|
||||
# TODO: document these and remove them from here.
|
||||
|
||||
# Fixes the duplicated
|
||||
autosummary_filename_map = {
|
||||
"torch.nn.utils.prune.identity": "torch.nn.utils.prune.identity_function",
|
||||
"torch.nn.utils.prune.Identity": "torch.nn.utils.prune.Identity_class",
|
||||
"torch.optim.adamw.adamw": "torch.optim.adamw.adamw_function",
|
||||
"torch.optim.adamw.AdamW": "torch.optim.adamw.AdamW_class",
|
||||
"torch.optim.asgd.asgd": "torch.optim.asgd.asgd_function",
|
||||
"torch.optim.asgd.ASGD": "torch.optim.asgd.ASGD_class",
|
||||
"torch.optim.nadam.nadam": "torch.optim.nadam.nadam_function",
|
||||
"torch.optim.nadam.NAdam": "torch.optim.nadam.NAdam_class",
|
||||
"torch.optim.radam.radam": "torch.optim.radam.radam_function",
|
||||
"torch.optim.radam.RAdam": "torch.optim.radam.RAdam_class",
|
||||
"torch.optim.rmsprop.rmsprop": "torch.optim.rmsprop.rmsprop_function",
|
||||
"torch.optim.rmsprop.RMSprop": "torch.optim.rmsprop.RMSprop_class",
|
||||
"torch.optim.rprop.rprop": "torch.optim.rprop.rprop_function",
|
||||
"torch.optim.rprop.Rprop": "torch.optim.rprop.Rprop_class",
|
||||
"torch.optim.sgd.sgd": "torch.optim.sgd.sgd_function",
|
||||
"torch.optim.sgd.SGD": "torch.optim.sgd.SGD_class",
|
||||
"torch.optim.adadelta.adadelta": "torch.optim.adadelta.adadelta_function",
|
||||
"torch.optim.adadelta.Adadelta": "torch.optim.adadelta.Adadelta_class",
|
||||
"torch.optim.adagrad.adagrad": "torch.optim.adagrad.adagrad_function",
|
||||
"torch.optim.adagrad.Adagrad": "torch.optim.adagrad.Adagrad_class",
|
||||
"torch.optim.adam.adam": "torch.optim.adam.adam_function",
|
||||
"torch.optim.adam.Adam": "torch.optim.adam.Adam_class",
|
||||
"torch.optim.adamax.adamax": "torch.optim.adamax.adamax_function",
|
||||
"torch.optim.adamax.Adamax": "torch.optim.adamax.Adamax_class",
|
||||
"torch.mtia.stream": "torch.mtia.stream_function",
|
||||
"torch.mtia.Stream": "torch.mtia.Stream_class",
|
||||
"torch.cpu.stream": "torch.cpu.stream_function",
|
||||
"torch.cpu.Stream": "torch.cpu.Stream_class",
|
||||
"torch.cuda.stream": "torch.cuda.stream_function",
|
||||
"torch.cuda.Stream": "torch.cuda.Stream_class",
|
||||
"torch.xpu.stream": "torch.xpu.stream_function",
|
||||
"torch.xpu.Stream": "torch.xpu.Stream_class",
|
||||
}
|
||||
|
||||
coverage_ignore_functions = [
|
||||
# torch
|
||||
"typename",
|
||||
@ -3253,6 +3289,11 @@ autodoc_type_aliases = {
|
||||
# Enable overriding of function signatures in the first line of the docstring.
|
||||
autodoc_docstring_signature = True
|
||||
|
||||
# Exclude inherited IntEnum methods that have RST formatting issues in their docstrings
|
||||
autodoc_default_options = {
|
||||
"exclude-members": "from_bytes, to_bytes",
|
||||
}
|
||||
|
||||
# -- katex javascript in header
|
||||
#
|
||||
# def setup(app):
|
||||
|
@ -262,28 +262,6 @@ See the docs for {class}`~torch.cuda.gds.GdsFile` for an example of how to use t
|
||||
|
||||
```
|
||||
|
||||
## Green Contexts (experimental)
|
||||
|
||||
`torch.cuda.green_contexts` provides thin wrappers around the CUDA Green Context APIs
|
||||
to enable more general carveout of SM resources for CUDA kernels.
|
||||
|
||||
These APIs can be used in PyTorch with CUDA versions greater than or equal to 12.8.
|
||||
|
||||
See the docs for {class}`~torch.cuda.green_contexts.GreenContext` for an example of how to use these.
|
||||
|
||||
```{eval-rst}
|
||||
.. currentmodule:: torch.cuda.green_contexts
|
||||
```
|
||||
|
||||
```{eval-rst}
|
||||
.. autosummary::
|
||||
:toctree: generated
|
||||
:nosignatures:
|
||||
|
||||
GreenContext
|
||||
```
|
||||
|
||||
|
||||
% This module needs to be documented. Adding here in the meantime
|
||||
|
||||
% for tracking purposes
|
||||
@ -296,10 +274,6 @@ See the docs for {class}`~torch.cuda.green_contexts.GreenContext` for an example
|
||||
.. py:module:: torch.cuda.gds
|
||||
```
|
||||
|
||||
```{eval-rst}
|
||||
.. py:module:: torch.cuda.green_contexts
|
||||
```
|
||||
|
||||
```{eval-rst}
|
||||
.. py:module:: torch.cuda.jiterator
|
||||
```
|
||||
@ -325,4 +299,4 @@ See the docs for {class}`~torch.cuda.green_contexts.GreenContext` for an example
|
||||
:hidden:
|
||||
|
||||
cuda.aliases.md
|
||||
```
|
||||
```
|
@ -233,7 +233,6 @@ regular full-precision tensor.
|
||||
.. autosummary::
|
||||
:toctree: generated
|
||||
:nosignatures:
|
||||
:template: classtemplate.rst
|
||||
|
||||
view
|
||||
as_strided
|
||||
|
@ -242,6 +242,7 @@ select = [
|
||||
"Q003", # avoidable escaped quote
|
||||
"Q004", # unnecessary escaped quote
|
||||
"RSE",
|
||||
"RUF007", # pairwise over zip
|
||||
"RUF008", # mutable dataclass default
|
||||
"RUF013", # ban implicit optional
|
||||
"RUF015", # access first ele in constant time
|
||||
|
12
pyrefly.toml
12
pyrefly.toml
@ -22,18 +22,16 @@ project-excludes = [
|
||||
# ==== to test Pyrefly on a specific directory, simply comment it out ====
|
||||
"torch/_inductor/**",
|
||||
"torch/distributed/**",
|
||||
"torch/nn/**",
|
||||
"torch/_dynamo/**",
|
||||
"torch/utils/**",
|
||||
"torch/ao/**",
|
||||
"torch/fx/**",
|
||||
"torch/distributions/**",
|
||||
"torch/onnx/**",
|
||||
# formatting issues
|
||||
"torch/linalg/__init__.py",
|
||||
"torch/package/importer.py",
|
||||
"torch/package/_package_pickler.py",
|
||||
"torch/jit/annotations.py",
|
||||
"torch/utils/data/datapipes/_typing.py",
|
||||
"torch/nn/functional.py",
|
||||
"torch/_export/utils.py",
|
||||
"torch/fx/experimental/unification/multipledispatch/__init__.py",
|
||||
"torch/nn/modules/__init__.py",
|
||||
# ====
|
||||
"benchmarks/instruction_counts/main.py",
|
||||
"benchmarks/instruction_counts/definitions/setup.py",
|
||||
|
@ -1111,14 +1111,6 @@
|
||||
"_amp_update_scale_",
|
||||
"_assert_async",
|
||||
"_batch_norm_impl_index",
|
||||
"_cast_Byte",
|
||||
"_cast_Char",
|
||||
"_cast_Double",
|
||||
"_cast_Float",
|
||||
"_cast_Half",
|
||||
"_cast_Int",
|
||||
"_cast_Long",
|
||||
"_cast_Short",
|
||||
"_choose_qparams_per_tensor",
|
||||
"_coalesce",
|
||||
"_compute_linear_combination",
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user