Compare commits

...

2 Commits

Author SHA1 Message Date
a59fd407a2 tc 2025-05-13 15:19:12 -07:00
0bcf83cfc2 tc 2025-05-13 15:17:48 -07:00
5 changed files with 203 additions and 176 deletions

View File

@ -0,0 +1,31 @@
name: Reuse old wheel if possible
description:
Reuse old wheel if possible
inputs:
build-environment:
description: Build environment
required: true
workflow-id:
description: Workflow ID
required: true
outputs:
changes:
description: Whether the wheel is reused or not
value: ${{ steps.check-file-changes.outputs.changes }}
runs:
using: composite
steps:
# Check out pytorch with fetch depth 0
- name: Check file changes
id: check-file-changes
run: |
set -x
python .github/actions/check-old-whl/check_old_whl.py --build-environment "${{ inputs.build-environment }}" --workflow-id "${{ inputs.workflow-id }}"
if [ $? -ne 0 ]; then
echo "changes=true" >> "$GITHUB_OUTPUT"
fi

View File

@ -0,0 +1,145 @@
from functools import lru_cache
import os
import subprocess
import sys
from pathlib import Path
from typing import Optional
import argparse
import zipfile
import requests
@lru_cache
def get_merge_base() -> str:
merge_base = subprocess.check_output(
["git", "merge-base", "HEAD", "origin/main"],
text=True,
stderr=subprocess.DEVNULL,
).strip()
return merge_base
def ok_changed_file(file: str) -> bool:
if file.startswith("torch/") and file.endswith(".py") and not file.startswith("torch/csrc/"):
return True
if file.startswith("test/") and file.endswith(".py"):
return True
return False
def check_changed_files():
merge_base = get_merge_base()
changed_files = subprocess.check_output(
["git", "diff", "--name-only", merge_base, "HEAD"],
text=True,
stderr=subprocess.DEVNULL,
).strip().split()
for file in changed_files:
if not ok_changed_file(file):
print(f"File {file} is not allowed to be changed.")
return False
else:
print(f"File {file} is allowed to be changed.")
return True
def query_github_api(url: str) -> dict:
headers = {
"Accept": "application/vnd.github.v3+json",
"Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}",
}
response = requests.get(url, headers=headers)
return response.json()
def find_old_whl(workflow_id: str, build_environment: str) -> bool:
if build_environment is None:
print("BUILD_ENVIRONMENT is not set.")
return False
merge_base = get_merge_base()
workflow_runs = query_github_api(
f"https://api.github.com/repos/pytorch/pytorch/actions/workflows/{workflow_id}/runs?head_sha={merge_base}&branch=main&status=completed&per_page=100"
)
if workflow_runs.get("total_count", 0) == 0:
print("No workflow runs found.")
return False
for run in workflow_runs.get("workflow_runs", []):
# Look in s3 for the old whl
run_id = run["id"]
try:
url = f"https://gha-artifacts.s3.amazonaws.com/pytorch/pytorch/{run_id}/{build_environment}/artifacts.zip"
response = requests.get(
url,
)
if response.status_code == 200:
os.makedirs("/tmp", exist_ok=True)
with open("/tmp/artifacts.zip", "wb") as f:
f.write(response.content)
print(f"Found old whl file from s3: {url}")
return True
except requests.RequestException as e:
print(f"Error checking for old whl: {e}")
continue
return False
def unzip_artifact_and_replace_files():
# Unzip the artifact and replace files
with zipfile.ZipFile("/tmp/artifacts.zip", "r") as zip_ref:
zip_ref.extractall("/tmp/artifacts")
# Rename wheel into zip
wheel_path = Path("/tmp/artifacts/dist").glob("*.whl")
print(wheel_path)
for path in wheel_path:
new_path = path.with_suffix(".zip")
os.rename(path, new_path)
print(f"Renamed {path} to {new_path}")
# Unzip the wheel
with zipfile.ZipFile(new_path, "r") as zip_ref:
print(f"Extracting {new_path} to /tmp/artifacts/dist/{new_path.stem}")
zip_ref.extractall(f"/tmp/artifacts/dist/{new_path.stem}")
# Copy python files into the artifact
subprocess.check_output(
["rsync", "-avz", "torch", f"/tmp/artifacts/dist/{new_path.stem}/torch"],
)
# Zip the wheel back
with zipfile.ZipFile(new_path, "w") as zip_ref:
for root, _, files in os.walk(f"/tmp/artifacts/dist/{new_path.stem}"):
for file in files:
file_path = os.path.join(root, file)
zip_ref.write(file_path, os.path.relpath(file_path, f"/tmp/artifacts/dist/{new_path.stem}"))
# Reame back to whl
os.rename(new_path, path)
# Remove the extracted folder
subprocess.check_output(
["rm", "-rf", f"/tmp/artifacts/dist/{new_path.stem}"],
)
# Rezip the artifact
with zipfile.ZipFile("/tmp/artifacts.zip", "w") as zip_ref:
for root, _, files in os.walk("/tmp/artifacts"):
for file in files:
file_path = os.path.join(root, file)
zip_ref.write(file_path, os.path.relpath(file_path, "/tmp/artifacts"))
# move artifact to the current directory
os.rename("/tmp/artifacts.zip", "artifacts.zip")
return None
def parse_args():
parser = argparse.ArgumentParser(description="Check for old whl files.")
parser.add_argument("--workflow-id", type=str, required=True, help="Workflow ID")
parser.add_argument("--build-environment", type=str, required=True, help="Build environment")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
can_use_old_whl = check_changed_files()
if not find_old_whl(args.workflow_id, args.build_environment):
exit(1)
unzip_artifact_and_replace_files()

View File

@ -132,16 +132,20 @@ jobs:
role-session-name: gha-linux-build
aws-region: us-east-1
- name: Check if can use old whl build
id: use-old-whl
uses: pytorch/pytorch/.github/actions/check-old-whl@main
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.use-old-whl.changes != 'true'
with:
docker-image-name: ${{ inputs.docker-image-name }}
- name: Use following to pull public copy of the image
id: print-ghcr-mirror
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.use-old-whl.changes != 'true'
env:
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
shell: bash
@ -151,7 +155,7 @@ jobs:
- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.use-old-whl.changes != 'true'
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@ -179,14 +183,14 @@ jobs:
- name: Download pytest cache
uses: ./.github/actions/pytest-cache-download
continue-on-error: true
if: inputs.build-environment != 'linux-s390x-binary-manywheel'
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.use-old-whl.changes != 'true'
with:
cache_dir: .pytest_cache
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
s3_bucket: ${{ inputs.s3-bucket }}
- name: Build
if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
if: (steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == '') && steps.use-old-whl.outputs.use-old-whl.changes != 'true'
id: build
env:
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
@ -281,7 +285,7 @@ jobs:
echo "build_time=$((END_TIME - START_TIME))" >> "$GITHUB_OUTPUT"
- name: Archive artifacts into zip
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped'
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && steps.use-old-whl.outputs.use-old-whl.changes != 'true'
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .additional_ci_files
@ -305,7 +309,7 @@ jobs:
path: artifacts.zip
- name: Upload sccache stats
if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.use-old-whl.changes != 'true'
uses: ./.github/actions/upload-sccache-stats
with:
github-token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -1,9 +1,9 @@
name: pull
on:
pull_request:
branches-ignore:
- nightly
# pull_request:
# branches-ignore:
# - nightly
push:
branches:
- main

View File

@ -45,159 +45,8 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
libtorch-linux-focal-cuda12_6-py3_10-gcc11-debug-build:
name: libtorch-linux-focal-cuda12.6-py3.10-gcc11-debug
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
build-environment: libtorch-linux-focal-cuda12.6-py3.10-gcc11
docker-image-name: ci-image:pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11
build-generates-artifacts: false
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runner: "linux.4xlarge"
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
]}
secrets: inherit
# no-ops builds test USE_PER_OPERATOR_HEADERS=0 where ATen/ops is not generated
linux-focal-cuda12_6-py3_10-gcc11-no-ops-build:
name: linux-focal-cuda12.6-py3.10-gcc11-no-ops
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-focal-cuda12.6-py3.10-gcc11-no-ops
docker-image-name: ci-image:pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
]}
secrets: inherit
macos-py3-arm64-build:
if: github.repository_owner == 'pytorch'
name: macos-py3-arm64
uses: ./.github/workflows/_mac-build.yml
with:
sync-tag: macos-py3-arm64-build
build-environment: macos-py3-arm64
runner-type: macos-m1-stable
build-generates-artifacts: true
# To match the one pre-installed in the m1 runners
python-version: 3.9.12
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 3, runner: "macos-m1-stable" },
{ config: "default", shard: 2, num_shards: 3, runner: "macos-m1-stable" },
{ config: "default", shard: 3, num_shards: 3, runner: "macos-m1-stable" },
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-13" },
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-14" },
{ config: "mps", shard: 1, num_shards: 1, runner: "macos-m2-15" },
]}
secrets: inherit
macos-py3-arm64-test:
name: macos-py3-arm64
uses: ./.github/workflows/_mac-test.yml
needs:
- macos-py3-arm64-build
- target-determination
with:
build-environment: macos-py3-arm64
# Same as the build job
python-version: 3.9.12
test-matrix: ${{ needs.macos-py3-arm64-build.outputs.test-matrix }}
secrets: inherit
win-vs2022-cpu-py3-build:
name: win-vs2022-cpu-py3
uses: ./.github/workflows/_win-build.yml
needs: get-label-type
with:
build-environment: win-vs2022-cpu-py3
cuda-version: cpu
sync-tag: win-cpu-build
runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" },
{ config: "default", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" },
{ config: "default", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" },
]}
secrets: inherit
win-vs2022-cpu-py3-test:
name: win-vs2022-cpu-py3
uses: ./.github/workflows/_win-test.yml
needs:
- win-vs2022-cpu-py3-build
- target-determination
with:
build-environment: win-vs2022-cpu-py3
cuda-version: cpu
test-matrix: ${{ needs.win-vs2022-cpu-py3-build.outputs.test-matrix }}
secrets: inherit
win-vs2022-cuda12_6-py3-build:
name: win-vs2022-cuda12.6-py3
uses: ./.github/workflows/_win-build.yml
needs: get-label-type
with:
build-environment: win-vs2022-cuda12.6-py3
cuda-version: "12.6"
runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
secrets: inherit
linux-jammy-rocm-py3_10-build:
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') }}
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-rocm-py3.10
docker-image-name: ci-image:pytorch-linux-jammy-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.2" },
{ config: "default", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.2" },
{ config: "distributed", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.4" },
]}
secrets: inherit
linux-jammy-rocm-py3_10-test:
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/trunk') }}
permissions:
id-token: write
contents: read
name: linux-jammy-rocm-py3.10
uses: ./.github/workflows/_rocm-test.yml
needs:
- linux-jammy-rocm-py3_10-build
- target-determination
with:
build-environment: linux-jammy-rocm-py3.10
docker-image: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-rocm-py3_10-build.outputs.test-matrix }}
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor distributed/test_c10d_common distributed/test_c10d_nccl"
secrets: inherit
# NB: Keep this in sync with inductor-perf-test-nightly.yml
linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
name: cuda12.4-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
docker-image-name: ci-image:pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
secrets: inherit
verify-cachebench-cpu-build:
name: verify-cachebench-cpu-build
linux-jammy-py3_9-gcc11-build:
name: linux-jammy-py3.9-gcc11
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
@ -206,18 +55,16 @@ jobs:
docker-image-name: ci-image:pytorch-linux-jammy-py3.9-gcc11
test-matrix: |
{ include: [
{ config: "verify_cachebench", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "docs_test", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "backwards_compat", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "distributed", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "distributed", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
{ config: "numpy_2_x", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
]}
secrets: inherit
verify-cachebench-cpu-test:
name: verify-cachebench-cpu-test
uses: ./.github/workflows/_linux-test.yml
needs:
- verify-cachebench-cpu-build
- target-determination
with:
build-environment: linux-jammy-py3.9-gcc11
docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }}
test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }}
secrets: inherit