Remove references to Rockset in trymerge (#137207)

For the migration to ClickHouse

But also Rockset is not used in trymerge anymore
Pull Request resolved: https://github.com/pytorch/pytorch/pull/137207
Approved by: https://github.com/huydhn, https://github.com/ZainRizvi
This commit is contained in:
Catherine Lee
2024-10-05 12:53:20 +00:00
committed by PyTorch MergeBot
parent 40d8260745
commit f54e142c58
3 changed files with 16 additions and 73 deletions

View File

@ -12,7 +12,7 @@ import json
import os
import warnings
from hashlib import sha256
from typing import Any, Dict, List, Optional
from typing import Any, List, Optional
from unittest import main, mock, skip, TestCase
from urllib.error import HTTPError
@ -24,7 +24,6 @@ from trymerge import (
find_matching_merge_rule,
get_classifications,
get_drci_classifications,
get_rockset_results,
gh_get_team_members,
GitHubPR,
JobCheckState,
@ -42,7 +41,6 @@ if "GIT_REMOTE_URL" not in os.environ:
os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"
GQL_MOCKS = "gql_mocks.json.gz"
ROCKSET_MOCKS = "rockset_mocks.json.gz"
DRCI_MOCKS = "drci_mocks.json.gz"
@ -77,16 +75,11 @@ def mock_query(
if err.code == 401 or err.code == 403:
err_msg = f"If you are seeing this message during workflow run, please make sure to update {file_name}"
err_msg += f" locally, by deleting it and running {os.path.basename(__file__)} with"
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN,"
err_msg += " the rockset api key passed via ROCKSET_API_KEY,"
err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN"
err_msg += " and drci api key passed via DRCI_BOT_KEY environment variables"
if (
os.getenv("GITHUB_TOKEN") is None
or os.getenv("ROCKSET_API_KEY") is None
or os.getenv("DRCI_BOT_KEY") is None
):
if os.getenv("GITHUB_TOKEN") is None or os.getenv("DRCI_BOT_KEY") is None:
err_msg = (
"Failed to update cached queries as GITHUB_TOKEN or ROCKSET_API_KEY or DRCI_BOT_KEY "
"Failed to update cached queries as GITHUB_TOKEN or DRCI_BOT_KEY "
+ "is not defined. "
+ err_msg
)
@ -110,16 +103,6 @@ def mocked_gh_graphql(query: str, **kwargs: Any) -> Any:
return mock_query(gh_graphql_wrapper, GQL_MOCKS, key_function, query, kwargs)
def mocked_rockset_results(head_sha: str, merge_base: str, num_retries: int = 3) -> Any:
return mock_query(
get_rockset_results,
ROCKSET_MOCKS,
lambda x, y: f"{x} {y}",
head_sha,
merge_base,
)
def mocked_drci_classifications(pr_num: int, project: str, num_retries: int = 3) -> Any:
return mock_query(
get_drci_classifications,
@ -273,10 +256,6 @@ def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
]
def empty_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
return []
class DummyGitRepo(GitRepo):
def __init__(self) -> None:
super().__init__(get_git_repo_dir(), get_git_remote_name())
@ -288,7 +267,6 @@ class DummyGitRepo(GitRepo):
return "super awsome commit message"
@mock.patch("trymerge.get_rockset_results", side_effect=empty_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch(
"trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
@ -604,7 +582,6 @@ class TestTryMerge(TestCase):
mocked_gh_fetch_merge_base.assert_called_once()
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch(
@ -843,7 +820,7 @@ class TestBypassFailures(TestCase):
checks = pr.get_checkrun_conclusions()
# Known flaky failure takes precedence over ignore current (need to set the
# merge base here to get the results from Rockset, and that categorize the
# merge base here to get the results from Dr. CI, and that categorize the
# broken trunk failure too
checks = get_classifications(
pr.pr_num,
@ -929,7 +906,6 @@ class TestBypassFailures(TestCase):
)
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch("trymerge.get_drci_classifications", return_value={})
@ -1008,7 +984,6 @@ class TestBypassFailuresOnSandCastle(TestCase):
self.assertTrue(len(failed) == 2)
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch(

View File

@ -452,8 +452,6 @@ RE_DIFF_REV = re.compile(r"^Differential Revision:.+?(D[0-9]+)", re.MULTILINE)
CIFLOW_LABEL = re.compile(r"^ciflow/.+")
CIFLOW_TRUNK_LABEL = re.compile(r"^ciflow/trunk")
MERGE_RULE_PATH = Path(".github") / "merge_rules.yaml"
ROCKSET_MERGES_COLLECTION = "merges"
ROCKSET_MERGES_WORKSPACE = "commons"
REMOTE_MAIN_BRANCH = "origin/main"
DRCI_CHECKRUN_NAME = "Dr.CI"
INTERNAL_CHANGES_CHECKRUN_NAME = "Meta Internal-Only Changes Check"
@ -1180,7 +1178,7 @@ class GitHubPR:
merge_commit_sha = repo.rev_parse(name=self.default_branch())
if comment_id and self.pr_num:
# Finally, upload the record to Rockset. The list of pending and failed
# Finally, upload the record to s3. The list of pending and failed
# checks are at the time of the merge
save_merge_record(
comment_id=comment_id,
@ -1202,7 +1200,7 @@ class GitHubPR:
ignore_current=bool(ignore_current_checks),
)
else:
print("Missing comment ID or PR number, couldn't upload to Rockset")
print("Missing comment ID or PR number, couldn't upload to s3")
# Usually Github will see that the commit has "resolves <pr_num>" in the
# commit message and close the PR, but sometimes it doesn't, leading to
@ -1481,7 +1479,7 @@ def find_matching_merge_rule(
# Categorize all checks when skip_mandatory_checks (force merge) is set. Do it here
# where the list of checks is readily available. These records will be saved into
# Rockset merge records
# s3 merge records
(
pending_mandatory_checks,
failed_mandatory_checks,
@ -1568,7 +1566,7 @@ def save_merge_record(
This saves the merge records as a json, which can later be uploaded to s3
"""
# Prepare the record to be written into Rockset
# Prepare the record to be written into s3
data = [
{
"comment_id": comment_id,
@ -1590,7 +1588,8 @@ def save_merge_record(
"ignore_current": ignore_current,
"error": error,
# This is a unique identifier for the record for deduping purposes
# in rockset. Any unique string would work
# in Rockset. Any unique string would work. This will not be used
# after we migrate off Rockset
"_id": f"{project}-{pr_num}-{comment_id}-{os.environ.get('GITHUB_RUN_ID')}",
}
]
@ -1600,36 +1599,6 @@ def save_merge_record(
json.dump(data, f)
@retries_decorator(rc=[])
def get_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
query = f"""
SELECT
w.name as workflow_name,
j.id,
j.name,
j.conclusion,
j.completed_at,
j.html_url,
j.head_sha,
j.torchci_classification.captures as failure_captures,
LENGTH(j.steps) as steps,
FROM
commons.workflow_job j join commons.workflow_run w on w.id = j.run_id
where
j.head_sha in ('{head_sha}','{merge_base}')
"""
try:
import rockset # type: ignore[import]
res = rockset.RocksetClient(
host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
).sql(query)
return cast(List[Dict[str, Any]], res.results)
except ModuleNotFoundError:
print("Could not use RockSet as rocket dependency is missing")
return []
@retries_decorator()
def get_drci_classifications(pr_num: int, project: str = "pytorch") -> Any:
"""
@ -2067,7 +2036,7 @@ def categorize_checks(
pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
# failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on s3
failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)
# If required_checks is not set or empty, consider all names are relevant
@ -2126,7 +2095,7 @@ def categorize_checks(
):
failed_checks = failed_checks + flaky_or_broken_trunk
# The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
# The list of failed_checks_categorization is returned so that it can be saved into the s3 merge record
return (pending_checks, failed_checks, failed_checks_categorization)
@ -2410,7 +2379,7 @@ def main() -> None:
handle_exception(e)
if args.comment_id and args.pr_num:
# Finally, upload the record to Rockset, we don't have access to the
# Finally, upload the record to s3, we don't have access to the
# list of pending and failed checks here, but they are not really
# needed at the moment
save_merge_record(
@ -2433,7 +2402,7 @@ def main() -> None:
error=str(e),
)
else:
print("Missing comment ID or PR number, couldn't upload to Rockset")
print("Missing comment ID or PR number, couldn't upload to s3")
finally:
if not args.check_mergeability:
gh_remove_label(

View File

@ -28,7 +28,7 @@ jobs:
check-latest: false
cache: pip
architecture: x64
- run: pip install pyyaml==6.0 rockset==1.0.3
- run: pip install pyyaml==6.0
- name: Setup committer id
run: |
@ -43,7 +43,6 @@ jobs:
COMMENT_ID: ${{ github.event.client_payload.comment_id }}
REBASE: ${{ github.event.client_payload.rebase }}
IGNORE_CURRENT: ${{ github.event.client_payload.ignore_current }}
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
DRCI_BOT_KEY: ${{ secrets.DRCI_BOT_KEY }}
GITHUB_RUN_ID: ${{ github.run_id }}
run: |