mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Upload external contribution data to s3 (#95747)"
This reverts commit f418e1f8b63c0c15f52b373a57bfd9d65d02b172.
Reverted https://github.com/pytorch/pytorch/pull/95747 on behalf of https://github.com/clee2000 due to broke lint on master, merge base is too old, https://github.com/pytorch/pytorch/actions/runs/4315881630/jobs/7531170401 f418e1f8b6 (11721314649)
This commit is contained in:
27
.github/workflows/upload-contrib-stats.yml
vendored
27
.github/workflows/upload-contrib-stats.yml
vendored
@ -1,27 +0,0 @@
|
||||
name: Upload contribution stats
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Choose a random time near midnight PST because it may be delayed if there are high loads
|
||||
- cron: 37 7 * * *
|
||||
|
||||
jobs:
|
||||
|
||||
upload-contribution-stats:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@master
|
||||
|
||||
- run: |
|
||||
pip3 install requests==2.26
|
||||
pip3 install rockset==1.0.3
|
||||
pip3 install boto3==1.19.12
|
||||
|
||||
- name: Upload external contribution stats
|
||||
env:
|
||||
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
echo "Uploading external contribution stats for $(date -v-1d +%F)"
|
||||
python3 -m tools.stats.upload_external_contrib_stats --startDate "$(date -v-1d +%F)"
|
@ -11,7 +11,7 @@ from tools.stats.upload_stats_lib import (
|
||||
download_s3_artifacts,
|
||||
is_rerun_disabled_tests,
|
||||
unzip,
|
||||
upload_workflow_stats_to_s3,
|
||||
upload_to_s3,
|
||||
)
|
||||
from tools.stats.upload_test_stats import process_xml_element
|
||||
|
||||
@ -218,7 +218,7 @@ def save_results(
|
||||
f" {disabled_test_name} from {filename}, failing {num_red}/{num_red + num_green}"
|
||||
)
|
||||
|
||||
upload_workflow_stats_to_s3(
|
||||
upload_to_s3(
|
||||
workflow_id,
|
||||
workflow_run_attempt,
|
||||
"rerun_disabled_tests",
|
||||
|
@ -1,142 +0,0 @@
|
||||
import argparse
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import urllib.parse
|
||||
from typing import Any, Callable, cast, Dict, List, Optional, Set
|
||||
from urllib.error import HTTPError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
# import time
|
||||
from tools.stats.upload_stats_lib import upload_to_s3
|
||||
|
||||
FILTER_OUT_USERS = set(["pytorchmergebot", "facebook-github-bot", "pytorch-bot[bot]"])
|
||||
|
||||
|
||||
def _fetch_url(
|
||||
url: str,
|
||||
headers: Dict[str, str],
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
method: Optional[str] = None,
|
||||
reader: Callable[[Any], Any] = lambda x: x.read(),
|
||||
) -> Any:
|
||||
token = os.environ.get("GITHUB_TOKEN")
|
||||
if token is not None and url.startswith("https://api.github.com/"):
|
||||
headers["Authorization"] = f"token {token}"
|
||||
data_ = json.dumps(data).encode() if data is not None else None
|
||||
try:
|
||||
with urlopen(Request(url, headers=headers, data=data_, method=method)) as conn:
|
||||
return reader(conn)
|
||||
except HTTPError as err:
|
||||
print(err.reason)
|
||||
print(err.headers)
|
||||
if err.code == 403 and all(
|
||||
key in err.headers for key in ["X-RateLimit-Limit", "X-RateLimit-Used"]
|
||||
):
|
||||
print(
|
||||
f"Rate limit exceeded: {err.headers['X-RateLimit-Used']}/{err.headers['X-RateLimit-Limit']}"
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def fetch_json(
|
||||
url: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
headers = {"Accept": "application/vnd.github.v3+json"}
|
||||
if params is not None and len(params) > 0:
|
||||
url += "?" + "&".join(
|
||||
f"{name}={urllib.parse.quote(str(val))}" for name, val in params.items()
|
||||
)
|
||||
return cast(
|
||||
List[Dict[str, Any]],
|
||||
_fetch_url(url, headers=headers, data=data, reader=json.load),
|
||||
)
|
||||
|
||||
|
||||
def get_external_pr_data(
|
||||
start_date: datetime.date, end_date: datetime.date, period_length: int = 1
|
||||
) -> List[Dict[str, Any]]:
|
||||
pr_info = []
|
||||
period_begin_date = start_date
|
||||
|
||||
pr_count = 0
|
||||
users: Set[str] = set()
|
||||
while period_begin_date < end_date:
|
||||
period_end_date = period_begin_date + datetime.timedelta(days=period_length - 1)
|
||||
page = 1
|
||||
responses: List[Dict[str, Any]] = []
|
||||
while len(responses) > 0 or page == 1:
|
||||
response = cast(
|
||||
Dict[str, Any],
|
||||
fetch_json(
|
||||
"https://api.github.com/search/issues",
|
||||
params={
|
||||
"q": f'repo:pytorch/pytorch is:pr is:closed \
|
||||
label:"open source" label:Merged -label:Reverted closed:{period_begin_date}..{period_end_date}',
|
||||
"per_page": "100",
|
||||
"page": str(page),
|
||||
},
|
||||
),
|
||||
)
|
||||
items = response["items"]
|
||||
for item in items:
|
||||
u = item["user"]["login"]
|
||||
if u not in FILTER_OUT_USERS:
|
||||
pr_count += 1
|
||||
users.add(u)
|
||||
page += 1
|
||||
|
||||
pr_info.append(
|
||||
{
|
||||
"date": str(period_begin_date),
|
||||
"pr_count": pr_count,
|
||||
"user_count": len(users),
|
||||
}
|
||||
)
|
||||
period_begin_date = period_end_date + datetime.timedelta(days=1)
|
||||
return pr_info
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Upload external contribution stats to Rockset"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--startDate",
|
||||
type=datetime.date.fromisoformat,
|
||||
required=True,
|
||||
help="the first date to upload data for in any valid ISO 8601 format format (eg. YYYY-MM-DD).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--length",
|
||||
type=int,
|
||||
required=False,
|
||||
help="the number of days to upload data for. Default is 1.",
|
||||
default=1,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--period-length",
|
||||
type=int,
|
||||
required=False,
|
||||
help="the number of days to group data for. Default is 1.",
|
||||
default=1,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
for i in range(args.length):
|
||||
startdate = args.startDate + datetime.timedelta(days=i)
|
||||
data = get_external_pr_data(
|
||||
startdate,
|
||||
startdate + datetime.timedelta(days=args.period_length),
|
||||
period_length=args.period_length,
|
||||
)
|
||||
upload_to_s3(
|
||||
bucket_name="torchci-contribution-data",
|
||||
key=f"external_contribution_counts/{str(startdate)}",
|
||||
docs=data,
|
||||
)
|
||||
# uncomment when running large queries locally to avoid github's rate limiting
|
||||
#
|
||||
# import time
|
||||
# time.sleep(20)
|
@ -116,8 +116,9 @@ def upload_to_rockset(collection: str, docs: List[Any]) -> None:
|
||||
|
||||
|
||||
def upload_to_s3(
|
||||
bucket_name: str,
|
||||
key: str,
|
||||
workflow_run_id: int,
|
||||
workflow_run_attempt: int,
|
||||
collection: str,
|
||||
docs: List[Dict[str, Any]],
|
||||
) -> None:
|
||||
print(f"Writing {len(docs)} documents to S3")
|
||||
@ -126,7 +127,10 @@ def upload_to_s3(
|
||||
json.dump(doc, body)
|
||||
body.write("\n")
|
||||
|
||||
S3_RESOURCE.Object(f"{bucket_name}", f"{key}",).put(
|
||||
S3_RESOURCE.Object(
|
||||
"ossci-raw-job-status",
|
||||
f"{collection}/{workflow_run_id}/{workflow_run_attempt}",
|
||||
).put(
|
||||
Body=gzip.compress(body.getvalue().encode()),
|
||||
ContentEncoding="gzip",
|
||||
ContentType="application/json",
|
||||
@ -134,17 +138,6 @@ def upload_to_s3(
|
||||
print("Done!")
|
||||
|
||||
|
||||
def upload_workflow_stats_to_s3(
|
||||
workflow_run_id: int,
|
||||
workflow_run_attempt: int,
|
||||
collection: str,
|
||||
docs: List[Dict[str, Any]],
|
||||
) -> None:
|
||||
bucket_name = "ossci-raw-job-status"
|
||||
key = f"{collection}/{workflow_run_id}/{workflow_run_attempt}"
|
||||
upload_to_s3(bucket_name, key, docs)
|
||||
|
||||
|
||||
def upload_file_to_s3(
|
||||
file_name: str,
|
||||
bucket: str,
|
||||
|
@ -11,7 +11,7 @@ from tools.stats.upload_stats_lib import (
|
||||
download_s3_artifacts,
|
||||
is_rerun_disabled_tests,
|
||||
unzip,
|
||||
upload_workflow_stats_to_s3,
|
||||
upload_to_s3,
|
||||
)
|
||||
|
||||
|
||||
@ -340,14 +340,14 @@ if __name__ == "__main__":
|
||||
test_case_summary, pytest_parallel_times
|
||||
)
|
||||
|
||||
upload_workflow_stats_to_s3(
|
||||
upload_to_s3(
|
||||
args.workflow_run_id,
|
||||
args.workflow_run_attempt,
|
||||
"test_run_summary",
|
||||
test_case_summary,
|
||||
)
|
||||
|
||||
upload_workflow_stats_to_s3(
|
||||
upload_to_s3(
|
||||
args.workflow_run_id,
|
||||
args.workflow_run_attempt,
|
||||
"invoking_file_times",
|
||||
@ -356,6 +356,6 @@ if __name__ == "__main__":
|
||||
|
||||
if args.head_branch == "master":
|
||||
# For master jobs, upload everytihng.
|
||||
upload_workflow_stats_to_s3(
|
||||
upload_to_s3(
|
||||
args.workflow_run_id, args.workflow_run_attempt, "test_run", test_cases
|
||||
)
|
||||
|
Reference in New Issue
Block a user