mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[ci][cutlass backend] Add ci for cutlass backend tests (#156626)
redo of https://github.com/pytorch/pytorch/pull/156136 Differential Revision: [D77327309](https://our.internmc.facebook.com/intern/diff/D77327309) I want to try land the full version first. If the ci is taking too long, we can revert back to only testing for a few names. ``` -k 'test_max_autotune_cutlass_backend_regular_mm and not test_max_autotune_cutlass_backend_regular_mm_streamk' ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/156626 Approved by: https://github.com/huydhn, https://github.com/mlazos
This commit is contained in:
committed by
PyTorch MergeBot
parent
21c97bd565
commit
d984143a74
@ -345,6 +345,12 @@ test_h100_symm_mem() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_h100_cutlass_backend() {
|
||||
# cutlass backend tests for H100
|
||||
TORCHINDUCTOR_CUTLASS_DIR=$(realpath "./third_party/cutlass") python test/run_test.py --include inductor/test_cutlass_backend -k "not addmm" $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||
TORCHINDUCTOR_CUTLASS_DIR=$(realpath "./third_party/cutlass") python test/run_test.py --include inductor/test_cutlass_evt $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||
}
|
||||
|
||||
test_lazy_tensor_meta_reference_disabled() {
|
||||
export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1
|
||||
echo "Testing lazy tensor operations without meta reference"
|
||||
@ -1769,6 +1775,8 @@ elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then
|
||||
test_h100_distributed
|
||||
elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then
|
||||
test_h100_symm_mem
|
||||
elif [[ "${TEST_CONFIG}" == h100_cutlass_backend ]]; then
|
||||
test_h100_cutlass_backend
|
||||
else
|
||||
install_torchvision
|
||||
install_monkeytype
|
||||
|
1
.github/pytorch-probot.yml
vendored
1
.github/pytorch-probot.yml
vendored
@ -32,6 +32,7 @@ ciflow_push_tags:
|
||||
- ciflow/h100
|
||||
- ciflow/h100-distributed
|
||||
- ciflow/h100-symm-mem
|
||||
- ciflow/h100-cutlass-backend
|
||||
retryable_workflows:
|
||||
- pull
|
||||
- trunk
|
||||
|
58
.github/workflows/h100-cutlass-backend.yml
vendored
Normal file
58
.github/workflows/h100-cutlass-backend.yml
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
name: Limited CI for CUTLASS backend on H100
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/h100-cutlass-backend.yml
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: 22 9 * * * # every 24 hours about 2:22am PDT
|
||||
push:
|
||||
tags:
|
||||
- ciflow/h100-cutlass-backend/*
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
|
||||
get-label-type:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
name: get-label-type
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
||||
with:
|
||||
triggering_actor: ${{ github.triggering_actor }}
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
|
||||
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
|
||||
cuda-arch-list: '9.0'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "h100_cutlass_backend", shard: 1, num_shards: 1, runner: "linux.aws.h100", owners: ["oncall:pt2"] },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
linux-jammy-cuda12_8-py3_10-gcc11-sm90-test:
|
||||
name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend
|
||||
with:
|
||||
build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
|
||||
docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend.outputs.test-matrix }}
|
||||
secrets: inherit
|
@ -128,7 +128,7 @@ def try_import_cutlass() -> bool:
|
||||
if tmp_cutlass_full_path not in sys.path:
|
||||
|
||||
def link_and_append(dst_link, src_path, parent_dir):
|
||||
if os.path.exists(dst_link):
|
||||
if os.path.lexists(dst_link):
|
||||
assert os.path.islink(dst_link), (
|
||||
f"{dst_link} is not a symlink. Try to remove {dst_link} manually and try again."
|
||||
)
|
||||
|
@ -1517,11 +1517,11 @@ class cuda:
|
||||
|
||||
# Path to the CUTLASS repo root directory.
|
||||
# The default path only works under PyTorch local development environment.
|
||||
cutlass_dir = os.environ.get(
|
||||
cutlass_dir = os.path.realpath(
|
||||
os.environ.get(
|
||||
"TORCHINDUCTOR_CUTLASS_DIR",
|
||||
os.path.abspath(
|
||||
os.path.join(os.path.dirname(torch.__file__), "../third_party/cutlass/")
|
||||
),
|
||||
os.path.join(os.path.dirname(torch.__file__), "../third_party/cutlass/"),
|
||||
)
|
||||
)
|
||||
|
||||
# Configures the maximum number of CUTLASS configs to profile in max_autotune.
|
||||
|
@ -1649,8 +1649,9 @@ def use_cutlass_template(layout: Layout, m: int, n: int, k: int) -> bool:
|
||||
if not try_import_cutlass():
|
||||
log.warning(
|
||||
"Failed to import CUTLASS lib. Please check whether "
|
||||
"_inductor.config.cuda.cutlass_dir is set correctly. "
|
||||
"Skipping CUTLASS backend for now."
|
||||
"_inductor.config.cuda.cutlass_dir %s is set correctly. "
|
||||
"Skipping CUTLASS backend for now.",
|
||||
config.cuda.cutlass_dir,
|
||||
)
|
||||
return False
|
||||
return res
|
||||
|
Reference in New Issue
Block a user