[1/2]Add summary report for vllm build (#161565)

Demo Run https://github.com/pytorch/pytorch/actions/runs/17259533323?pr=161565 <img width="1538" height="720" alt="image" src="https://github.com/user-attachments/assets/64f6d7b4-cac6-4c12-863c-b15514bb8810" /> Pull Request resolved: https://github.com/pytorch/pytorch/pull/161565 Approved by: https://github.com/huydhn
2025-10-20 21:14:14 +08:00 · 2025-08-27 19:33:04 -07:00
parent d3d9eb4777
commit c83b43d7a8
8 changed files with 232 additions and 9 deletions
--- a/.ci/lumen_cli/cli/lib/common/gh_summary.py
+++ b/.ci/lumen_cli/cli/lib/common/gh_summary.py
@ -0,0 +1,143 @@
+from __future__ import annotations
+
+import logging
+import os
+import textwrap
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from cli.lib.common.utils import get_wheels
+from jinja2 import Template
+
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Mapping
+
+
+logger = logging.getLogger(__name__)
+
+_TPL_CONTENT = Template(
+    textwrap.dedent("""\
+    ## {{ title }}
+
+    ```{{ lang }}
+    {{ content }}
+    ```
+""")
+)
+
+_TPL_LIST_ITEMS = Template(
+    textwrap.dedent("""\
+    ## {{ title }}
+    {% for it in items %}
+    - {{ it.pkg }}: {{ it.relpath }}
+    {% else %}
+    _(no item found)_
+    {% endfor %}
+    """)
+)
+
+_TPL_TABLE = Template(
+    textwrap.dedent("""\
+    {%- if rows %}
+    | {{ cols | join(' | ') }} |
+    |{%- for _ in cols %} --- |{%- endfor %}
+    {%- for r in rows %}
+    | {%- for c in cols %} {{ r.get(c, "") }} |{%- endfor %}
+    {%- endfor %}
+    {%- else %}
+    _(no data)_
+    {%- endif %}
+""")
+)
+
+
+def gh_summary_path() -> Path | None:
+    """Return the Path to the GitHub step summary file, or None if not set."""
+    p = os.environ.get("GITHUB_STEP_SUMMARY")
+    return Path(p) if p else None
+
+
+def write_gh_step_summary(md: str, *, append_content: bool = True) -> bool:
+    """
+    Write Markdown content to the GitHub Step Summary file if GITHUB_STEP_SUMMARY is set.
+    append_content: default true, if True, append to the end of the file, else overwrite the whole file
+
+    Returns:
+        True if written successfully (in GitHub Actions environment),
+        False if skipped (e.g., running locally where the variable is not set).
+    """
+    sp = gh_summary_path()
+    if not sp:
+        logger.info("[gh-summary] GITHUB_STEP_SUMMARY not set, skipping write.")
+        return False
+
+    md_clean = textwrap.dedent(md).strip() + "\n"
+
+    mode = "a" if append_content else "w"
+    with sp.open(mode, encoding="utf-8") as f:
+        f.write(md_clean)
+    return True
+
+
+def md_heading(text: str, level: int = 2) -> str:
+    """Generate a Markdown heading string with the given level (1-6)."""
+    return f"{'#' * max(1, min(level, 6))} {text}\n"
+
+
+def md_details(summary: str, content: str) -> str:
+    """Generate a collapsible <details> block with a summary and inner content."""
+    return f"<details>\n<summary>{summary}</summary>\n\n{content}\n\n</details>\n"
+
+
+def summarize_content_from_file(
+    output_dir: Path,
+    freeze_file: str,
+    title: str = "Content from file",
+    code_lang: str = "",  # e.g. "text" or "ini"
+) -> bool:
+    f = Path(output_dir) / freeze_file
+    if not f.exists():
+        return False
+    content = f.read_text(encoding="utf-8").strip()
+    md = render_content(content, title=title, lang=code_lang)
+    return write_gh_step_summary(md)
+
+
+def summarize_wheels(path: Path, title: str = "Wheels", max_depth: int = 3):
+    items = get_wheels(path, max_depth=max_depth)
+    if not items:
+        return False
+    md = render_list(items, title=title)
+    return write_gh_step_summary(md)
+
+
+def md_kv_table(rows: Iterable[Mapping[str, str | int | float]]) -> str:
+    """
+    Render a list of dicts as a Markdown table using Jinja template.
+    """
+    rows = list(rows)
+    cols = list({k for r in rows for k in r.keys()})
+    md = _TPL_TABLE.render(cols=cols, rows=rows).strip() + "\n"
+    return md
+
+
+def render_list(
+    items: Iterable[str],
+    *,
+    title: str = "List",
+) -> str:
+    tpl = _TPL_LIST_ITEMS
+    md = tpl.render(title=title, items=items)
+    return md
+
+
+def render_content(
+    content: str,
+    *,
+    title: str = "Content",
+    lang: str = "text",
+) -> str:
+    tpl = _TPL_CONTENT
+    md = tpl.render(title=title, content=content, lang=lang)
+    return md
--- a/.ci/lumen_cli/cli/lib/common/git_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/git_helper.py
@ -45,7 +45,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules

        # Checkout pinned commit
        commit = get_post_build_pinned_commit(target)
-        logger.info("Checking out pinned commit %s", commit)
+        logger.info("Checking out pinned %s commit %s", target, commit)
        r.git.checkout(commit)

        # Update submodules if requested
@ -55,7 +55,7 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
                sm.update(init=True, recursive=True, progress=PrintProgress())

        logger.info("Successfully cloned %s", target)
-        return r
+        return r, commit

    except GitCommandError as e:
        logger.error("Git operation failed: %s", e)
--- a/.ci/lumen_cli/cli/lib/common/pip_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/pip_helper.py
@ -4,7 +4,7 @@ import shlex
 import shutil
 import sys
 from collections.abc import Iterable
-from importlib.metadata import PackageNotFoundError, version
+from importlib.metadata import PackageNotFoundError, version  # noqa: UP035
 from typing import Optional, Union

 from cli.lib.common.utils import run_command
--- a/.ci/lumen_cli/cli/lib/common/utils.py
+++ b/.ci/lumen_cli/cli/lib/common/utils.py
@ -8,6 +8,7 @@ import shlex
 import subprocess
 import sys
 from contextlib import contextmanager
+from pathlib import Path
 from typing import Optional


@ -115,3 +116,24 @@ def working_directory(path: str):
        yield
    finally:
        os.chdir(prev_cwd)
+
+
+def get_wheels(
+    output_dir: Path,
+    max_depth: Optional[int] = None,
+) -> list[str]:
+    """Return a list of wheels found in the given output directory."""
+    root = Path(output_dir)
+    if not root.exists():
+        return []
+    items = []
+    for dirpath, _, filenames in os.walk(root):
+        depth = Path(dirpath).relative_to(root).parts
+        if max_depth is not None and len(depth) > max_depth:
+            continue
+        for fname in sorted(filenames):
+            if fname.endswith(".whl"):
+                pkg = fname.split("-")[0]
+                relpath = str((Path(dirpath) / fname).relative_to(root))
+                items.append({"pkg": pkg, "relpath": relpath})
+    return items
--- a/.ci/lumen_cli/cli/lib/core/vllm/lib.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/lib.py
@ -1,13 +1,27 @@
 import logging
+import os
+import textwrap
 from typing import Any

+from cli.lib.common.gh_summary import write_gh_step_summary
 from cli.lib.common.git_helper import clone_external_repo
 from cli.lib.common.pip_helper import pip_install_packages
 from cli.lib.common.utils import run_command, temp_environ, working_directory
+from jinja2 import Template


 logger = logging.getLogger(__name__)

+_TPL_VLLM_INFO = Template(
+    textwrap.dedent("""\
+    ##  Vllm against Pytorch CI Test Summary
+    **Vllm Commit**: [{{ vllm_commit }}](https://github.com/vllm-project/vllm/commit/{{ vllm_commit }})
+    {%- if torch_sha %}
+    **Pytorch Commit**: [{{ torch_sha }}](https://github.com/pytorch/pytorch/commit/{{ torch_sha }})
+    {%- endif %}
+""")
+)
+

 def sample_vllm_test_library():
    """
@ -214,12 +228,13 @@ def run_test_plan(


 def clone_vllm(dst: str = "vllm"):
-    clone_external_repo(
+    _, commit = clone_external_repo(
        target="vllm",
        repo="https://github.com/vllm-project/vllm.git",
        dst=dst,
        update_submodules=True,
    )
+    return commit


 def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str:
@ -230,3 +245,12 @@ def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) ->
    for k in sorted(mapping, key=len, reverse=True):
        step = step.replace(k, mapping[k])
    return step
+
+
+def summarize_build_info(vllm_commit: str) -> bool:
+    torch_sha = os.getenv("GITHUB_SHA")
+    md = (
+        _TPL_VLLM_INFO.render(vllm_commit=vllm_commit, torch_sha=torch_sha).strip()
+        + "\n"
+    )
+    return write_gh_step_summary(md)
--- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
+++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
@ -13,6 +13,11 @@ from cli.lib.common.envs_helper import (
    env_str_field,
    with_params_help,
 )
+from cli.lib.common.gh_summary import (
+    gh_summary_path,
+    summarize_content_from_file,
+    summarize_wheels,
+)
 from cli.lib.common.path_helper import (
    copy,
    ensure_dir_exists,
@ -21,7 +26,7 @@ from cli.lib.common.path_helper import (
    is_path_exist,
 )
 from cli.lib.common.utils import run_command
-from cli.lib.core.vllm.lib import clone_vllm
+from cli.lib.core.vllm.lib import clone_vllm, summarize_build_info


 logger = logging.getLogger(__name__)
@ -153,18 +158,43 @@ class VllmBuildRunner(BaseRunner):
        """
        inputs = VllmBuildParameters()
        logger.info("Running vllm build with inputs: %s", inputs)
-        clone_vllm()
+        vllm_commit = clone_vllm()

        self.cp_dockerfile_if_exist(inputs)
-
        # cp torch wheels from root direct to vllm workspace if exist
        self.cp_torch_whls_if_exist(inputs)

-        ensure_dir_exists(inputs.output_dir)
+        # make sure the output dir to store the build artifacts exist
+        ensure_dir_exists(Path(inputs.output_dir))

        cmd = self._generate_docker_build_cmd(inputs)
        logger.info("Running docker build: \n %s", cmd)
-        run_command(cmd, cwd="vllm", env=os.environ.copy())
+
+        try:
+            run_command(cmd, cwd="vllm", env=os.environ.copy())
+        finally:
+            self.genearte_vllm_build_summary(vllm_commit, inputs)
+
+    def genearte_vllm_build_summary(
+        self, vllm_commit: str, inputs: VllmBuildParameters
+    ):
+        if not gh_summary_path():
+            return logger.info("Skipping, not detect GH Summary env var....")
+        logger.info("Generate GH Summary ...")
+        # summarize vllm build info
+        summarize_build_info(vllm_commit)
+
+        # summarize vllm build artifacts
+        vllm_artifact_dir = inputs.output_dir / "wheels"
+        summarize_content_from_file(
+            vllm_artifact_dir,
+            "build_summary.txt",
+            title="Vllm build env pip package summary",
+        )
+        summarize_wheels(
+            inputs.torch_whls_path, max_depth=3, title="Torch Wheels Artifacts"
+        )
+        summarize_wheels(vllm_artifact_dir, max_depth=3, title="Vllm Wheels Artifacts")

    def cp_torch_whls_if_exist(self, inputs: VllmBuildParameters) -> str:
        if not inputs.use_torch_whl:
--- a/.github/actions/build-external-packages/action.yml
+++ b/.github/actions/build-external-packages/action.yml
@ -48,6 +48,7 @@ runs:
        BASE_IMAGE: ${{ inputs.docker-image }}
        BUILD_TARGETS: ${{ inputs.build-targets }}
        PARENT_OUTPUT_DIR: ${{ inputs.output-dir}}
+
      shell: bash
      run: |
        set -euo pipefail
--- a/.github/ci_configs/vllm/Dockerfile.tmp_vllm
+++ b/.github/ci_configs/vllm/Dockerfile.tmp_vllm
@ -176,6 +176,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Build can take a long time, and the torch nightly version fetched from url can be different in next docker stage.
 # track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same
 RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt
+
 RUN cat torch_build_versions.txt
 RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio'

@ -376,6 +377,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \

 # Logging to confirm the torch versions
 RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer'
+RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm\|^flashinfer' > build_summary.txt
 ################### VLLM INSTALLED IMAGE ####################


@ -433,4 +435,5 @@ FROM scratch as export-wheels
 # Just copy the wheels we prepared in previous stages
 COPY --from=base /workspace/xformers-dist /wheels/xformers
 COPY --from=build /workspace/vllm-dist /wheels/vllm
+COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt
 COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python