[ci][cutlass backend] Add ci for cutlass backend tests (#156626)

redo of https://github.com/pytorch/pytorch/pull/156136 Differential Revision: [D77327309](https://our.internmc.facebook.com/intern/diff/D77327309) I want to try land the full version first. If the ci is taking too long, we can revert back to only testing for a few names. ``` -k 'test_max_autotune_cutlass_backend_regular_mm and not test_max_autotune_cutlass_backend_regular_mm_streamk' ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/156626 Approved by: https://github.com/huydhn, https://github.com/mlazos
2025-10-20 21:14:14 +08:00 · 2025-07-21 14:38:22 -07:00
parent 21c97bd565
commit d984143a74
6 changed files with 76 additions and 8 deletions
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -345,6 +345,12 @@ test_h100_symm_mem() {
  assert_git_not_dirty
 }

+test_h100_cutlass_backend() {
+  # cutlass backend tests for H100
+  TORCHINDUCTOR_CUTLASS_DIR=$(realpath "./third_party/cutlass") python test/run_test.py --include inductor/test_cutlass_backend -k "not addmm" $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
+  TORCHINDUCTOR_CUTLASS_DIR=$(realpath "./third_party/cutlass") python test/run_test.py --include inductor/test_cutlass_evt $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
+}
+
 test_lazy_tensor_meta_reference_disabled() {
  export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1
  echo "Testing lazy tensor operations without meta reference"
@ -1769,6 +1775,8 @@ elif [[ "${TEST_CONFIG}" == h100_distributed ]]; then
  test_h100_distributed
 elif [[ "${TEST_CONFIG}" == "h100-symm-mem" ]]; then
  test_h100_symm_mem
+elif [[ "${TEST_CONFIG}" == h100_cutlass_backend ]]; then
+  test_h100_cutlass_backend
 else
  install_torchvision
  install_monkeytype
--- a/.github/pytorch-probot.yml
+++ b/.github/pytorch-probot.yml
@ -32,6 +32,7 @@ ciflow_push_tags:
 - ciflow/h100
 - ciflow/h100-distributed
 - ciflow/h100-symm-mem
+- ciflow/h100-cutlass-backend
 retryable_workflows:
 - pull
 - trunk
--- a/.github/workflows/h100-cutlass-backend.yml
+++ b/.github/workflows/h100-cutlass-backend.yml
@ -0,0 +1,58 @@
+name: Limited CI for CUTLASS backend on H100
+
+on:
+  pull_request:
+    paths:
+      - .github/workflows/h100-cutlass-backend.yml
+  workflow_dispatch:
+  schedule:
+    - cron: 22 9 * * *  # every 24 hours about 2:22am PDT
+  push:
+    tags:
+      - ciflow/h100-cutlass-backend/*
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+
+  get-label-type:
+    if: github.repository_owner == 'pytorch'
+    name: get-label-type
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    with:
+      triggering_actor: ${{ github.triggering_actor }}
+      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
+      curr_branch: ${{ github.head_ref || github.ref_name }}
+      curr_ref_type: ${{ github.ref_type }}
+
+  linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend:
+    name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
+    uses: ./.github/workflows/_linux-build.yml
+    needs: get-label-type
+    with:
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
+      docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
+      cuda-arch-list: '9.0'
+      test-matrix: |
+        { include: [
+          { config: "h100_cutlass_backend", shard: 1, num_shards: 1, runner: "linux.aws.h100", owners: ["oncall:pt2"] },
+        ]}
+    secrets: inherit
+
+  linux-jammy-cuda12_8-py3_10-gcc11-sm90-test:
+    name: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
+    uses: ./.github/workflows/_linux-test.yml
+    needs:
+      - linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend
+    with:
+      build-environment: linux-jammy-cuda12.8-py3.10-gcc11-sm90-cutlass-backend
+      docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm90-build-cutlass-backend.outputs.test-matrix }}
+    secrets: inherit
--- a/torch/_inductor/codegen/cuda/cutlass_utils.py
+++ b/torch/_inductor/codegen/cuda/cutlass_utils.py
@ -128,7 +128,7 @@ def try_import_cutlass() -> bool:
        if tmp_cutlass_full_path not in sys.path:

            def link_and_append(dst_link, src_path, parent_dir):
-                if os.path.exists(dst_link):
+                if os.path.lexists(dst_link):
                    assert os.path.islink(dst_link), (
                        f"{dst_link} is not a symlink. Try to remove {dst_link} manually and try again."
                    )
--- a/torch/_inductor/config.py
+++ b/torch/_inductor/config.py
@ -1517,11 +1517,11 @@ class cuda:

    # Path to the CUTLASS repo root directory.
    # The default path only works under PyTorch local development environment.
-    cutlass_dir = os.environ.get(
+    cutlass_dir = os.path.realpath(
+        os.environ.get(
            "TORCHINDUCTOR_CUTLASS_DIR",
-        os.path.abspath(
-            os.path.join(os.path.dirname(torch.__file__), "../third_party/cutlass/")
-        ),
+            os.path.join(os.path.dirname(torch.__file__), "../third_party/cutlass/"),
+        )
    )

    # Configures the maximum number of CUTLASS configs to profile in max_autotune.
--- a/torch/_inductor/utils.py
+++ b/torch/_inductor/utils.py
@ -1649,8 +1649,9 @@ def use_cutlass_template(layout: Layout, m: int, n: int, k: int) -> bool:
        if not try_import_cutlass():
            log.warning(
                "Failed to import CUTLASS lib. Please check whether "
-                "_inductor.config.cuda.cutlass_dir is set correctly. "
-                "Skipping CUTLASS backend for now."
+                "_inductor.config.cuda.cutlass_dir %s is set correctly. "
+                "Skipping CUTLASS backend for now.",
+                config.cuda.cutlass_dir,
            )
            return False
    return res