Remove upload_test_stat_aggregates script (#139915)

Instead of moving these queries to ClickHouse, we're just going to remove it since it's not really used.  We do want something for test aggregates, but we can make a new script instead
Pull Request resolved: https://github.com/pytorch/pytorch/pull/139915
Approved by: https://github.com/huydhn
This commit is contained in:
Catherine Lee
2024-11-07 20:14:12 +00:00
committed by PyTorch MergeBot
parent 83fa1014f1
commit df136df8d5
4 changed files with 7 additions and 97 deletions

View File

@ -1,4 +1,4 @@
name: Nightly Upload to rockset
name: Nightly Upload to s3
on:
schedule:
@ -7,8 +7,7 @@ on:
pull_request:
paths:
- 'tools/stats/upload_external_contrib_stats.py'
- 'tools/stats/upload_test_stat_aggregates.py'
- '.github/workflows/nightly-rockset-uploads.yml'
- '.github/workflows/nightly-s3-uploads.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
@ -16,7 +15,7 @@ concurrency:
jobs:
upload-stats-to-rockset:
upload-stats-to-s3:
if: github.repository_owner == 'pytorch'
runs-on: ubuntu-22.04
environment: upload-stats
@ -33,16 +32,14 @@ jobs:
cache: pip
- run: |
pip3 install requests==2.32.2 rockset==1.0.3 boto3==1.35.42
pip3 install requests==2.32.2 boto3==1.35.42
- name: Upload external contribution stats
uses: nick-fields/retry@v3.0.0
env:
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
if: ${{ env.ROCKSET_API_KEY != '' }}
with:
timeout_minutes: 10
max_attempts: 10
@ -50,5 +47,3 @@ jobs:
command: |
echo "Uploading external contribution stats for 10 days starting on" "$(date -d '10 days ago' '+%Y-%m-%d')"
python3 -m tools.stats.upload_external_contrib_stats --startDate "$(date -d '10 days ago' '+%Y-%m-%d')" --length 10
echo "Uploading testing aggregate data" "$(date -d yesterday '+%Y-%m-%d')"
python3 -m tools.stats.upload_test_stat_aggregates --date "$(date -d yesterday '+%Y-%m-%d')"

View File

@ -112,7 +112,7 @@ def get_external_pr_data(
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Upload external contribution stats to Rockset"
description="Upload external contribution stats to s3"
)
parser.add_argument(
"--startDate",

View File

@ -13,7 +13,6 @@ from typing import Any, Callable, Dict, List, Optional
import boto3 # type: ignore[import]
import requests
import rockset # type: ignore[import]
PYTORCH_REPO = "https://api.github.com/repos/pytorch/pytorch"
@ -128,6 +127,8 @@ def upload_to_rockset(
workspace: str = "commons",
client: Any = None,
) -> None:
import rockset # type: ignore[import]
if not client:
client = rockset.RocksetClient(
host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]

View File

@ -1,86 +0,0 @@
from __future__ import annotations
import argparse
import ast
import datetime
import json
import os
import re
from typing import Any
import rockset # type: ignore[import]
from tools.stats.upload_stats_lib import upload_to_s3
def get_oncall_from_testfile(testfile: str) -> list[str] | None:
path = f"test/{testfile}"
if not path.endswith(".py"):
path += ".py"
# get oncall on test file
try:
with open(path) as f:
for line in f:
if line.startswith("# Owner(s): "):
possible_lists = re.findall(r"\[.*\]", line)
if len(possible_lists) > 1:
raise Exception("More than one list found") # noqa: TRY002
elif len(possible_lists) == 0:
raise Exception( # noqa: TRY002
"No oncalls found or file is badly formatted"
) # noqa: TRY002
oncalls = ast.literal_eval(possible_lists[0])
return list(oncalls)
except Exception as e:
if "." in testfile:
return [f"module: {testfile.split('.')[0]}"]
else:
return ["module: unmarked"]
return None
def get_test_stat_aggregates(date: datetime.date) -> Any:
# Initialize the Rockset client with your API key
rockset_api_key = os.environ["ROCKSET_API_KEY"]
rockset_api_server = "api.rs2.usw2.rockset.com"
iso_date = date.isoformat()
rs = rockset.RocksetClient(host="api.usw2a1.rockset.com", api_key=rockset_api_key)
# Define the name of the Rockset collection and lambda function
collection_name = "commons"
lambda_function_name = "test_insights_per_daily_upload"
query_parameters = [
rockset.models.QueryParameter(name="startTime", type="string", value=iso_date)
]
api_response = rs.QueryLambdas.execute_query_lambda(
query_lambda=lambda_function_name,
version="692684fa5b37177f",
parameters=query_parameters,
)
for i in range(len(api_response["results"])):
oncalls = get_oncall_from_testfile(api_response["results"][i]["test_file"])
api_response["results"][i]["oncalls"] = oncalls
return json.loads(
json.dumps(api_response["results"], indent=4, sort_keys=True, default=str)
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Upload test stat aggregates to Rockset."
)
parser.add_argument(
"--date",
type=datetime.date.fromisoformat,
help="Date to upload test stat aggregates for (YYYY-MM-DD). Must be in the last 30 days",
required=True,
)
args = parser.parse_args()
if args.date < datetime.datetime.now().date() - datetime.timedelta(days=30):
raise ValueError("date must be in the last 30 days")
data = get_test_stat_aggregates(date=args.date)
upload_to_s3(
bucket_name="torchci-aggregated-stats",
key=f"test_data_aggregates/{str(args.date)}",
docs=data,
)