From 3eb3f4ed5580010a7961d996ccc6ee19c7ccbb5e Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Fri, 3 Jan 2025 00:32:05 +0000 Subject: [PATCH] Upload METADATA file with whl binaries (#143677) Upload the metadata file for wheels for pep658 https://peps.python.org/pep-0658/ Using a python script but using bash might be easier... -- Testing Example run https://github.com/pytorch/pytorch/actions/runs/12550595201/job/34994883276 without actual upload, just dry run Lightly tested the script to make sure it uploads to s3, but integration with the bash script + workflow is untested Pull Request resolved: https://github.com/pytorch/pytorch/pull/143677 Approved by: https://github.com/seemethere --- .circleci/scripts/binary_upload.sh | 20 ++++-- .lintrunner.toml | 1 + scripts/release/upload_metadata_file.py | 89 +++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 scripts/release/upload_metadata_file.py diff --git a/.circleci/scripts/binary_upload.sh b/.circleci/scripts/binary_upload.sh index 36461a1b810a..31503226454d 100755 --- a/.circleci/scripts/binary_upload.sh +++ b/.circleci/scripts/binary_upload.sh @@ -68,17 +68,29 @@ s3_upload() { local pkg_type extension="$1" pkg_type="$2" - s3_root_dir="${UPLOAD_BUCKET}/${pkg_type}/${UPLOAD_CHANNEL}" + s3_key_prefix="${pkg_type}/${UPLOAD_CHANNEL}" if [[ -z ${UPLOAD_SUBFOLDER:-} ]]; then - s3_upload_dir="${s3_root_dir}/" + s3_upload_dir="${UPLOAD_BUCKET}/${s3_key_prefix}/" else - s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/" + s3_key_prefix="${s3_key_prefix}/${UPLOAD_SUBFOLDER}" + s3_upload_dir="${UPLOAD_BUCKET}/${s3_key_prefix}/" fi ( for pkg in ${PKG_DIR}/*.${extension}; do ( set -x ${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" + if [[ ${pkg_type} == "whl" ]]; then + dry_run_arg="--dry-run" + if [[ "${DRY_RUN}" = "disabled" ]]; then + dry_run_arg="" + fi + uv run scripts/release/upload_metadata_file.py \ + --package "${pkg}" \ + --bucket "${UPLOAD_BUCKET}" \ + --key-prefix "${s3_key_prefix}" \ + ${dry_run_arg} + fi ) done ) @@ -86,7 +98,7 @@ s3_upload() { # Install dependencies (should be a no-op if previously installed) conda install -yq anaconda-client -pip install -q awscli +pip install -q awscli uv case "${PACKAGE_TYPE}" in conda) diff --git a/.lintrunner.toml b/.lintrunner.toml index b73073a37beb..05ed0717a659 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -129,6 +129,7 @@ include_patterns = [ 'test/test_type_hints.py', 'test/test_type_info.py', 'test/test_utils.py', + 'scripts/release/upload_metadata_file.py', ] exclude_patterns = [ '**/fb/**', diff --git a/scripts/release/upload_metadata_file.py b/scripts/release/upload_metadata_file.py new file mode 100644 index 000000000000..f8c62e07f4d3 --- /dev/null +++ b/scripts/release/upload_metadata_file.py @@ -0,0 +1,89 @@ +# /// script +# requires-python = ">=3.9" +# dependencies = [ +# "boto3", +# ] +# /// +import argparse +import os +import shutil +import zipfile +from functools import cache +from pathlib import Path +from typing import Any + +import boto3 + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Upload metadata file to S3") + parser.add_argument( + "--package", type=str, required=True, help="Path to the package" + ) + parser.add_argument( + "--bucket", type=str, required=True, help="S3 bucket to upload metadata file to" + ) + parser.add_argument( + "--key-prefix", + type=str, + required=True, + help="S3 key to upload metadata file to", + ) + parser.add_argument("--dry-run", action="store_true", help="Dry run") + args = parser.parse_args() + # Sanitize the input a bit by removing s3:// prefix + trailing/leading + # slashes + if args.bucket.startswith("s3://"): + args.bucket = args.bucket[5:] + if args.bucket.endswith("/"): + args.bucket = args.bucket[:-1] + if args.key_prefix.startswith("/"): + args.key_prefix = args.key_prefix[1:] + if args.key_prefix.endswith("/"): + args.key_prefix = args.key_prefix[:-1] + return args + + +@cache +def get_s3_client() -> Any: + return boto3.client("s3") + + +def s3_upload(s3_bucket: str, s3_key: str, file: str, dry_run: bool) -> None: + s3 = get_s3_client() + if dry_run: + print(f"Dry run uploading {file} to s3://{s3_bucket}/{s3_key}") + return + s3.upload_file(file, s3_bucket, s3_key, ExtraArgs={"ChecksumAlgorithm": "sha256"}) + + +def extract_metadata(file: str) -> str: + # Copy the file to a temp location to extract the METADATA file + file_name = Path(file).name + tmp = "/tmp" + shutil.copy(file, tmp) + zip_file = f"{tmp}/{file_name.replace('.whl', '.zip')}" + shutil.move(f"{tmp}/{file_name}", zip_file) + + with zipfile.ZipFile(zip_file, "r") as zip_ref: + for filename in zip_ref.infolist(): + if filename.filename.endswith(".dist-info/METADATA"): + filename.filename = "METADATA" + if os.path.exists(f"{tmp}/METADATA"): + os.remove(f"{tmp}/METADATA") + zip_ref.extract(filename, tmp) + return tmp + + +if __name__ == "__main__": + # https://peps.python.org/pep-0658/ + # Upload the METADATA file to S3 + args = parse_args() + location = extract_metadata(args.package) + metadata_file = f"{location}/METADATA" + s3_upload( + args.bucket, + f"{args.key_prefix}/{Path(args.package).name}.metadata", + metadata_file, + args.dry_run, + )