mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Remove upload_test_stat_aggregates script (#139915)
Instead of moving these queries to ClickHouse, we're just going to remove it since it's not really used. We do want something for test aggregates, but we can make a new script instead Pull Request resolved: https://github.com/pytorch/pytorch/pull/139915 Approved by: https://github.com/huydhn
This commit is contained in:
committed by
PyTorch MergeBot
parent
83fa1014f1
commit
df136df8d5
@ -1,4 +1,4 @@
|
||||
name: Nightly Upload to rockset
|
||||
name: Nightly Upload to s3
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@ -7,8 +7,7 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'tools/stats/upload_external_contrib_stats.py'
|
||||
- 'tools/stats/upload_test_stat_aggregates.py'
|
||||
- '.github/workflows/nightly-rockset-uploads.yml'
|
||||
- '.github/workflows/nightly-s3-uploads.yml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
@ -16,7 +15,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
|
||||
upload-stats-to-rockset:
|
||||
upload-stats-to-s3:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
runs-on: ubuntu-22.04
|
||||
environment: upload-stats
|
||||
@ -33,16 +32,14 @@ jobs:
|
||||
cache: pip
|
||||
|
||||
- run: |
|
||||
pip3 install requests==2.32.2 rockset==1.0.3 boto3==1.35.42
|
||||
pip3 install requests==2.32.2 boto3==1.35.42
|
||||
|
||||
- name: Upload external contribution stats
|
||||
uses: nick-fields/retry@v3.0.0
|
||||
env:
|
||||
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
if: ${{ env.ROCKSET_API_KEY != '' }}
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
@ -50,5 +47,3 @@ jobs:
|
||||
command: |
|
||||
echo "Uploading external contribution stats for 10 days starting on" "$(date -d '10 days ago' '+%Y-%m-%d')"
|
||||
python3 -m tools.stats.upload_external_contrib_stats --startDate "$(date -d '10 days ago' '+%Y-%m-%d')" --length 10
|
||||
echo "Uploading testing aggregate data" "$(date -d yesterday '+%Y-%m-%d')"
|
||||
python3 -m tools.stats.upload_test_stat_aggregates --date "$(date -d yesterday '+%Y-%m-%d')"
|
@ -112,7 +112,7 @@ def get_external_pr_data(
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Upload external contribution stats to Rockset"
|
||||
description="Upload external contribution stats to s3"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--startDate",
|
||||
|
@ -13,7 +13,6 @@ from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import boto3 # type: ignore[import]
|
||||
import requests
|
||||
import rockset # type: ignore[import]
|
||||
|
||||
|
||||
PYTORCH_REPO = "https://api.github.com/repos/pytorch/pytorch"
|
||||
@ -128,6 +127,8 @@ def upload_to_rockset(
|
||||
workspace: str = "commons",
|
||||
client: Any = None,
|
||||
) -> None:
|
||||
import rockset # type: ignore[import]
|
||||
|
||||
if not client:
|
||||
client = rockset.RocksetClient(
|
||||
host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
|
||||
|
@ -1,86 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
import rockset # type: ignore[import]
|
||||
|
||||
from tools.stats.upload_stats_lib import upload_to_s3
|
||||
|
||||
|
||||
def get_oncall_from_testfile(testfile: str) -> list[str] | None:
|
||||
path = f"test/{testfile}"
|
||||
if not path.endswith(".py"):
|
||||
path += ".py"
|
||||
# get oncall on test file
|
||||
try:
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
if line.startswith("# Owner(s): "):
|
||||
possible_lists = re.findall(r"\[.*\]", line)
|
||||
if len(possible_lists) > 1:
|
||||
raise Exception("More than one list found") # noqa: TRY002
|
||||
elif len(possible_lists) == 0:
|
||||
raise Exception( # noqa: TRY002
|
||||
"No oncalls found or file is badly formatted"
|
||||
) # noqa: TRY002
|
||||
oncalls = ast.literal_eval(possible_lists[0])
|
||||
return list(oncalls)
|
||||
except Exception as e:
|
||||
if "." in testfile:
|
||||
return [f"module: {testfile.split('.')[0]}"]
|
||||
else:
|
||||
return ["module: unmarked"]
|
||||
return None
|
||||
|
||||
|
||||
def get_test_stat_aggregates(date: datetime.date) -> Any:
|
||||
# Initialize the Rockset client with your API key
|
||||
rockset_api_key = os.environ["ROCKSET_API_KEY"]
|
||||
rockset_api_server = "api.rs2.usw2.rockset.com"
|
||||
iso_date = date.isoformat()
|
||||
rs = rockset.RocksetClient(host="api.usw2a1.rockset.com", api_key=rockset_api_key)
|
||||
|
||||
# Define the name of the Rockset collection and lambda function
|
||||
collection_name = "commons"
|
||||
lambda_function_name = "test_insights_per_daily_upload"
|
||||
query_parameters = [
|
||||
rockset.models.QueryParameter(name="startTime", type="string", value=iso_date)
|
||||
]
|
||||
api_response = rs.QueryLambdas.execute_query_lambda(
|
||||
query_lambda=lambda_function_name,
|
||||
version="692684fa5b37177f",
|
||||
parameters=query_parameters,
|
||||
)
|
||||
for i in range(len(api_response["results"])):
|
||||
oncalls = get_oncall_from_testfile(api_response["results"][i]["test_file"])
|
||||
api_response["results"][i]["oncalls"] = oncalls
|
||||
return json.loads(
|
||||
json.dumps(api_response["results"], indent=4, sort_keys=True, default=str)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Upload test stat aggregates to Rockset."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--date",
|
||||
type=datetime.date.fromisoformat,
|
||||
help="Date to upload test stat aggregates for (YYYY-MM-DD). Must be in the last 30 days",
|
||||
required=True,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
if args.date < datetime.datetime.now().date() - datetime.timedelta(days=30):
|
||||
raise ValueError("date must be in the last 30 days")
|
||||
data = get_test_stat_aggregates(date=args.date)
|
||||
upload_to_s3(
|
||||
bucket_name="torchci-aggregated-stats",
|
||||
key=f"test_data_aggregates/{str(args.date)}",
|
||||
docs=data,
|
||||
)
|
Reference in New Issue
Block a user