[CI] upload_metrics function to upload to s3 instead of dynamo (#136799)

* Upload_metrics function to upload to ossci-raw-job-status bucket instead of dynamo
* Moves all added metrics to a field called "info" so ingesting into database table with a strict schema is easier
* Removes the dynamo_key field since it is no longer needed
* Removes the concept of reserved metrics, since they cannot be overwritten by user added metrics anymore
* Moves s3 resource initialization behind a function so import is faster
---
Tested by emitting a metric during run_test and seeing that documents got added to s3
Pull Request resolved: https://github.com/pytorch/pytorch/pull/136799
Approved by: https://github.com/ZainRizvi
This commit is contained in:
Catherine Lee
2024-10-02 23:19:28 +00:00
committed by PyTorch MergeBot
parent 2c9e194e23
commit 235f7e06f4
3 changed files with 64 additions and 137 deletions

View File

@ -7,6 +7,7 @@ import math
import os
import time
import zipfile
from functools import lru_cache
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
@ -16,7 +17,12 @@ import rockset # type: ignore[import]
PYTORCH_REPO = "https://api.github.com/repos/pytorch/pytorch"
S3_RESOURCE = boto3.resource("s3")
@lru_cache
def get_s3_resource() -> Any:
return boto3.resource("s3")
# NB: In CI, a flaky test is usually retried 3 times, then the test file would be rerun
# 2 more times
@ -83,7 +89,7 @@ def _download_artifact(
def download_s3_artifacts(
prefix: str, workflow_run_id: int, workflow_run_attempt: int
) -> list[Path]:
bucket = S3_RESOURCE.Bucket("gha-artifacts")
bucket = get_s3_resource().Bucket("gha-artifacts")
objs = bucket.objects.filter(
Prefix=f"pytorch/pytorch/{workflow_run_id}/{workflow_run_attempt}/artifact/{prefix}"
)
@ -172,7 +178,7 @@ def upload_to_s3(
json.dump(doc, body)
body.write("\n")
S3_RESOURCE.Object(
get_s3_resource().Object(
f"{bucket_name}",
f"{key}",
).put(
@ -189,7 +195,8 @@ def read_from_s3(
) -> list[dict[str, Any]]:
print(f"Reading from s3://{bucket_name}/{key}")
body = (
S3_RESOURCE.Object(
get_s3_resource()
.Object(
f"{bucket_name}",
f"{key}",
)