Remove references to Rockset in trymerge (#137207)

For the migration to ClickHouse But also Rockset is not used in trymerge anymore Pull Request resolved: https://github.com/pytorch/pytorch/pull/137207 Approved by: https://github.com/huydhn, https://github.com/ZainRizvi
2025-10-20 21:14:14 +08:00 · 2024-10-05 12:53:20 +00:00
parent 40d8260745
commit f54e142c58
3 changed files with 16 additions and 73 deletions
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@ -452,8 +452,6 @@ RE_DIFF_REV = re.compile(r"^Differential Revision:.+?(D[0-9]+)", re.MULTILINE)
 CIFLOW_LABEL = re.compile(r"^ciflow/.+")
 CIFLOW_TRUNK_LABEL = re.compile(r"^ciflow/trunk")
 MERGE_RULE_PATH = Path(".github") / "merge_rules.yaml"
-ROCKSET_MERGES_COLLECTION = "merges"
-ROCKSET_MERGES_WORKSPACE = "commons"
 REMOTE_MAIN_BRANCH = "origin/main"
 DRCI_CHECKRUN_NAME = "Dr.CI"
 INTERNAL_CHANGES_CHECKRUN_NAME = "Meta Internal-Only Changes Check"
@ -1180,7 +1178,7 @@ class GitHubPR:
        merge_commit_sha = repo.rev_parse(name=self.default_branch())

        if comment_id and self.pr_num:
-            # Finally, upload the record to Rockset. The list of pending and failed
+            # Finally, upload the record to s3. The list of pending and failed
            # checks are at the time of the merge
            save_merge_record(
                comment_id=comment_id,
@ -1202,7 +1200,7 @@ class GitHubPR:
                ignore_current=bool(ignore_current_checks),
            )
        else:
-            print("Missing comment ID or PR number, couldn't upload to Rockset")
+            print("Missing comment ID or PR number, couldn't upload to s3")

        # Usually Github will see that the commit has "resolves <pr_num>" in the
        # commit message and close the PR, but sometimes it doesn't, leading to
@ -1481,7 +1479,7 @@ def find_matching_merge_rule(

        # Categorize all checks when skip_mandatory_checks (force merge) is set. Do it here
        # where the list of checks is readily available. These records will be saved into
-        # Rockset merge records
+        # s3 merge records
        (
            pending_mandatory_checks,
            failed_mandatory_checks,
@ -1568,7 +1566,7 @@ def save_merge_record(
    This saves the merge records as a json, which can later be uploaded to s3
    """

-    # Prepare the record to be written into Rockset
+    # Prepare the record to be written into s3
    data = [
        {
            "comment_id": comment_id,
@ -1590,7 +1588,8 @@ def save_merge_record(
            "ignore_current": ignore_current,
            "error": error,
            # This is a unique identifier for the record for deduping purposes
-            # in rockset.  Any unique string would work
+            # in Rockset.  Any unique string would work.  This will not be used
+            # after we migrate off Rockset
            "_id": f"{project}-{pr_num}-{comment_id}-{os.environ.get('GITHUB_RUN_ID')}",
        }
    ]
@ -1600,36 +1599,6 @@ def save_merge_record(
        json.dump(data, f)


-@retries_decorator(rc=[])
-def get_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
-    query = f"""
-SELECT
-    w.name as workflow_name,
-    j.id,
-    j.name,
-    j.conclusion,
-    j.completed_at,
-    j.html_url,
-    j.head_sha,
-    j.torchci_classification.captures as failure_captures,
-    LENGTH(j.steps) as steps,
-FROM
-    commons.workflow_job j join commons.workflow_run w on w.id = j.run_id
-where
-    j.head_sha in ('{head_sha}','{merge_base}')
-"""
-    try:
-        import rockset  # type: ignore[import]
-
-        res = rockset.RocksetClient(
-            host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
-        ).sql(query)
-        return cast(List[Dict[str, Any]], res.results)
-    except ModuleNotFoundError:
-        print("Could not use RockSet as rocket dependency is missing")
-        return []
-
-
@retries_decorator()
 def get_drci_classifications(pr_num: int, project: str = "pytorch") -> Any:
    """
@ -2067,7 +2036,7 @@ def categorize_checks(
    pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
    failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []

-    # failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
+    # failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on s3
    failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)

    # If required_checks is not set or empty, consider all names are relevant
@ -2126,7 +2095,7 @@ def categorize_checks(
    ):
        failed_checks = failed_checks + flaky_or_broken_trunk

-    # The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
+    # The list of failed_checks_categorization is returned so that it can be saved into the s3 merge record
    return (pending_checks, failed_checks, failed_checks_categorization)


@ -2410,7 +2379,7 @@ def main() -> None:
        handle_exception(e)

        if args.comment_id and args.pr_num:
-            # Finally, upload the record to Rockset, we don't have access to the
+            # Finally, upload the record to s3, we don't have access to the
            # list of pending and failed checks here, but they are not really
            # needed at the moment
            save_merge_record(
@ -2433,7 +2402,7 @@ def main() -> None:
                error=str(e),
            )
        else:
-            print("Missing comment ID or PR number, couldn't upload to Rockset")
+            print("Missing comment ID or PR number, couldn't upload to s3")
    finally:
        if not args.check_mergeability:
            gh_remove_label(