Compare commits

...

39 Commits

Author SHA1 Message Date
9473b9e532 Merge remote-tracking branch 'origin/main' into ci_attn 2025-11-12 20:08:54 +00:00
a112bb81b8 Merge remote-tracking branch 'origin/main' into ci_attn 2025-11-10 14:09:38 -08:00
289111df8a Update attention_op_microbenchmark.yml 2025-11-10 11:01:03 -08:00
f2c43efab9 Merge branch 'main' into ci_attn 2025-10-29 16:45:33 -07:00
c1f01d51ef Define attention kernel op name 2025-10-29 11:15:55 -07:00
665df0bc72 Merge remote-tracking branch 'origin/attention_benchmark' into ci_attn 2025-10-28 13:57:16 -07:00
2882e9037b Fix output-json 2025-10-28 13:53:58 -07:00
a58b89df44 Fix linting 2025-10-28 12:54:07 -07:00
c684aa411b Merge remote-tracking branch 'origin/main' into attention_benchmark 2025-10-28 10:20:14 -07:00
836383de99 Merge remote-tracking branch 'origin/attention_benchmark' into ci_attn 2025-10-27 21:16:06 -07:00
fcb1bf53cb Merge remote-tracking branch 'origin/main' into attention_benchmark 2025-10-27 21:14:03 -07:00
df9bd5ce7c Fix linting 2025-10-27 21:09:17 -07:00
c3f01becc0 Removed test changes 2025-10-27 10:00:33 -07:00
56bdd1aa05 Test only safe-backend 2025-10-27 09:03:42 -07:00
d2602f0884 Merge remote-tracking branch 'origin/attention_benchmark' into ci_attn 2025-10-27 09:01:15 -07:00
82b70f9a7f Update safe-backend use 2025-10-27 08:59:52 -07:00
9f2c63259f Remove commented code 2025-10-26 22:03:53 -07:00
6b08e5e075 Merge remote-tracking branch 'origin/attention_benchmark' into ci_attn 2025-10-26 21:41:38 -07:00
2bcd29cc48 Handle nan 2025-10-26 21:36:18 -07:00
f7f410a1f5 Clean fav2 2025-10-26 21:06:24 -07:00
1832d9720f Format config logic now 2025-10-23 14:16:19 -07:00
4069d76684 Add print-config 2025-10-23 13:47:46 -07:00
1d3f285e3f Revert accidental third_party/fbgemm submodule update
Revert fbgemm submodule to match main branch commit.
2025-10-23 10:27:58 -07:00
a1066fc671 Merge branch 'attention_benchmark' into ci_attn 2025-10-23 10:24:58 -07:00
11536e5a6b Update json 2025-10-23 10:21:01 -07:00
09daf4533d Testing small subset 2025-10-21 13:49:25 -07:00
5899e0478d Add more runners 2025-10-21 12:27:04 -07:00
7b98ed8273 Update attention-gym installation 2025-10-21 12:00:52 -07:00
ad118b53d8 Update attention-gym installation 2025-10-21 11:56:45 -07:00
00832e4a45 updates 2025-10-20 09:44:14 -07:00
56e3f97f2a Attention ops in CI 2025-10-20 09:37:31 -07:00
9d8778b8bf Lint and minor fixes 2025-10-20 08:58:32 -07:00
86279c6f25 Merge remote-tracking branch 'origin/main' into attention_benchmark 2025-10-19 16:08:23 -07:00
93553121d8 Update score_mod 2025-10-09 15:30:13 -07:00
e5eb96af95 Add json 2025-10-06 09:52:59 -07:00
4407b6c9e3 Transformer benchmarks 2025-10-06 08:30:52 -07:00
22ea056fcd Update score_mod 2025-09-30 14:00:51 -07:00
d7466fd5c6 Add config files 2025-09-30 10:36:04 -07:00
de61804393 Add attention benchmarking 2025-09-29 09:35:01 -07:00
3 changed files with 109 additions and 5 deletions

View File

@ -1680,6 +1680,22 @@ test_operator_microbenchmark() {
done
}
test_attention_microbenchmark() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
TEST_DIR=$(pwd)
# Install attention-gym dependency
echo "Installing attention-gym..."
python -m pip install git+https://github.com/meta-pytorch/attention-gym.git@main
pip show triton
cd "${TEST_DIR}"/benchmarks/transformer
$TASKSET python score_mod.py --config configs/config_basic.yaml \
--output-json-for-dashboard "${TEST_REPORTS_DIR}/attention_microbenchmark.json"
}
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
(cd test && python -c "import torch; print(torch.__config__.show())")
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
@ -1737,6 +1753,8 @@ elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then
fi
elif [[ "${TEST_CONFIG}" == *operator_microbenchmark* ]]; then
test_operator_microbenchmark
elif [[ "${TEST_CONFIG}" == *attention_microbenchmark* ]]; then
test_attention_microbenchmark
elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
test_inductor_distributed
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then

View File

@ -0,0 +1,73 @@
name: attention_op_microbenchmark
on:
push:
tags:
- ciflow/op-benchmark/*
workflow_dispatch:
schedule:
# Run at 06:00 UTC everyday
- cron: 0 7 * * *
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
permissions:
id-token: write
contents: read
jobs:
attn-microbenchmark-build:
if: github.repository_owner == 'pytorch'
uses: ./.github/workflows/_linux-build.yml
with:
runner: linux.12xlarge.memory
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
cuda-arch-list: '8.0 9.0'
test-matrix: |
{ include: [
{ config: "attention_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.a100" },
{ config: "attention_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.aws.h100" },
]}
secrets: inherit
attn-microbenchmark-test:
name: attn-microbenchmark-test
uses: ./.github/workflows/_linux-test.yml
needs: attn-microbenchmark-build
with:
timeout-minutes: 500
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80
docker-image: ${{ needs.attn-microbenchmark-build.outputs.docker-image }}
test-matrix: ${{ needs.attn-microbenchmark-build.outputs.test-matrix }}
secrets: inherit
# B200 runner
opmicrobenchmark-build-b200:
if: github.repository_owner == 'pytorch'
name: opmicrobenchmark-build-b200
uses: ./.github/workflows/_linux-build.yml
with:
runner: linux.12xlarge.memory
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
cuda-arch-list: '10.0'
test-matrix: |
{ include: [
{ config: "operator_microbenchmark_test", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
]}
secrets: inherit
opmicrobenchmark-test-b200:
name: opmicrobenchmark-test-b200
uses: ./.github/workflows/_linux-test.yml
needs: opmicrobenchmark-build-b200
with:
timeout-minutes: 500
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
docker-image: ${{ needs.opmicrobenchmark-build-b200.outputs.docker-image }}
test-matrix: ${{ needs.opmicrobenchmark-build-b200.outputs.test-matrix }}
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
secrets: inherit

View File

@ -125,6 +125,17 @@ AttentionType = Literal[
]
DtypeString = Literal["bfloat16", "float16", "float32"]
SpeedupType = Literal["fwd", "bwd"]
# Operator Name mapping
backend_to_operator_name = {
"math": "math attention kernel",
"efficient": "efficient attention kernel",
"cudnn": "cudnn attention kernel",
"fav2": "flash attention 2 kernel",
"fav3": "flash attention 3 kernel",
"fakv": "flash attention kv cache kernel",
"og-eager": "eager attention kernel",
"flex": "flex attention kernel",
}
def benchmark_torch_function_in_microseconds(func: Callable, *args, **kwargs) -> float:
@ -1265,12 +1276,14 @@ def _output_json_for_dashboard(
model: ModelInfo
metric: MetricInfo
operator_name = backend_to_operator_name.get(backend, backend)
# Benchmark extra info
benchmark_extra_info = {
"input_config": input_config,
"device": device,
"arch": device_arch,
"operator_name": backend,
"operator_name": operator_name,
"attn_type": config.attn_type,
"shape": str(config.shape),
"max_autotune": config.max_autotune,
@ -1288,7 +1301,7 @@ def _output_json_for_dashboard(
type="attention-benchmark",
origins=["pytorch"],
extra_info={
"operator_name": backend,
"operator_name": operator_name,
"attn_type": config.attn_type,
},
),
@ -1315,7 +1328,7 @@ def _output_json_for_dashboard(
type="attention-benchmark",
origins=["pytorch"],
extra_info={
"operator_name": backend,
"operator_name": operator_name,
},
),
metric=MetricInfo(
@ -1341,7 +1354,7 @@ def _output_json_for_dashboard(
type="attention-benchmark",
origins=["pytorch"],
extra_info={
"operator_name": backend,
"operator_name": operator_name,
},
),
metric=MetricInfo(
@ -1371,7 +1384,7 @@ def _output_json_for_dashboard(
type="attention-benchmark",
origins=["pytorch"],
extra_info={
"operator_name": backend,
"operator_name": operator_name,
},
),
metric=MetricInfo(