mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[utilization] pipeline to create clean db records (#145327)
upload_utilization_script to generate db-ready-insert records to s3 - generate two files: metadata and timeseries in ossci-utilization buckets - convert log record to db format ones - add unit test job for tools/stats/ Related Prs: setup composite action for data pipeline: https://github.com/pytorch/pytorch/pull/145310 add permission for composite action to access S3 bucket: https://github.com/pytorch-labs/pytorch-gha-infra/pull/595 add insert logic in s3 replicator: https://github.com/pytorch/test-infra/pull/6217 Pull Request resolved: https://github.com/pytorch/pytorch/pull/145327 Approved by: https://github.com/huydhn Co-authored-by: Huy Do <huydhn@gmail.com>
This commit is contained in:
committed by
PyTorch MergeBot
parent
18a7a04c4a
commit
a9ed7bd78e
@ -23,6 +23,9 @@ def get_s3_resource() -> Any:
|
||||
return boto3.resource("s3")
|
||||
|
||||
|
||||
GHA_ARTIFACTS_BUCKET = "gha-artifacts"
|
||||
|
||||
|
||||
# NB: In CI, a flaky test is usually retried 3 times, then the test file would be rerun
|
||||
# 2 more times
|
||||
MAX_RETRY_IN_NON_DISABLED_MODE = 3 * 3
|
||||
@ -84,16 +87,22 @@ def _download_artifact(
|
||||
|
||||
|
||||
def download_s3_artifacts(
|
||||
prefix: str, workflow_run_id: int, workflow_run_attempt: int
|
||||
prefix: str,
|
||||
workflow_run_id: int,
|
||||
workflow_run_attempt: int,
|
||||
job_id: Optional[int] = None,
|
||||
) -> list[Path]:
|
||||
bucket = get_s3_resource().Bucket("gha-artifacts")
|
||||
bucket = get_s3_resource().Bucket(GHA_ARTIFACTS_BUCKET)
|
||||
objs = bucket.objects.filter(
|
||||
Prefix=f"pytorch/pytorch/{workflow_run_id}/{workflow_run_attempt}/artifact/{prefix}"
|
||||
)
|
||||
|
||||
found_one = False
|
||||
paths = []
|
||||
for obj in objs:
|
||||
object_name = Path(obj.key).name
|
||||
# target an artifact for a specific job_id if provided, otherwise skip the download.
|
||||
if job_id is not None and str(job_id) not in object_name:
|
||||
continue
|
||||
found_one = True
|
||||
p = Path(Path(obj.key).name)
|
||||
print(f"Downloading {p}")
|
||||
|
Reference in New Issue
Block a user