Enable opting out of experiments even when they're being rolled out (#140433)

Enables opting out of specific experiments in the runner determinator To opt out: 1. Go to the tracking issue: https://github.com/pytorch/test-infra/issues/5132 2. In the entry by your name, enter the experiment name, prefixed with a `-`. For example, to opt out of the LF fleet you could enter `@ZainRIzvi,-lf` This lets you simultaneously be opted into some experiments and opted out of others. While the `disable-runner-experiments` label offers an option to disable all experiments on a given PR, this one lets you disable a selected set of experiments across all your PRs. Fixes https://github.com/pytorch/pytorch/issues/138099 Pull Request resolved: https://github.com/pytorch/pytorch/pull/140433 Approved by: https://github.com/zxiiro, https://github.com/jeanschmidt
2025-10-20 21:14:14 +08:00 · 2024-11-14 19:18:20 +00:00
parent b11ff3cf60
commit b69282c98c
3 changed files with 175 additions and 2 deletions
--- a/.github/scripts/runner_determinator.py
+++ b/.github/scripts/runner_determinator.py
@ -46,9 +46,10 @@ Example config:
    # Opt-ins:
    # Users can opt into the LF fleet by adding their GitHub username to this list
    # and specifying experiments to enable in a comma-separated list.
+    # To always opt out of an experiment, prefix it with a "-".
    # Experiments should be from the above list.

-    @User1,lf,split_build
+    @User1,-lf,split_build
    @User2,lf
    @User3,split_build
 """
@ -57,6 +58,7 @@ import json
 import logging
 import os
 import random
+import re
 import sys
 from argparse import ArgumentParser
 from functools import lru_cache
@ -307,6 +309,27 @@ def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins:
    return optins


+def is_valid_experiment_name(experiment_name: str) -> bool:
+    """
+    Check if the experiment name is valid.
+    A valid name:
+        - Contains only alphanumeric characters and the special characters "_" & "-"
+        - The special characters "_" & "-" shouldn't be the first or last characters
+        - Cannot contain spaces
+    """
+
+    valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$"
+    valid = bool(re.match(valid_char_regex, experiment_name))
+
+    if valid:
+        return True
+
+    log.error(
+        f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters."
+    )
+    return False
+
+
 def parse_settings_from_text(settings_text: str) -> Settings:
    """
    Parse the experiments from the issue body into a list of ExperimentSettings
@ -325,6 +348,10 @@ def parse_settings_from_text(settings_text: str) -> Settings:
            experiments = {}

            for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
+                if not is_valid_experiment_name(exp_name):
+                    # Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed.
+                    continue
+
                valid_settings = {}
                for setting in exp_settings:
                    if setting not in Experiment._fields:
@ -372,6 +399,23 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -
    return experiment_name in user_optins.get(user, [])


+def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
+    """
+    Check if a user explicitly opted out of an experiment
+    """
+    # if the experiment is prefixed with a "-", then it's an opt-out
+    experiment_optout = "-" + experiment_name
+    if experiment_optout not in user_optins.get(user, []):
+        return False
+
+    if is_user_opted_in(user, user_optins, experiment_name):
+        log.warning(
+            f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out"
+        )
+
+    return True
+
+
 def get_runner_prefix(
    rollout_state: str,
    workflow_requestors: Iterable[str],
@ -404,6 +448,19 @@ def get_runner_prefix(
            )
            continue

+        # Is any workflow_requestor opted out to this experiment?
+        opted_out_users = [
+            requestor
+            for requestor in workflow_requestors
+            if is_user_opted_out(requestor, user_optins, experiment_name)
+        ]
+
+        if opted_out_users:
+            log.info(
+                f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}."
+            )
+            continue
+
        # Is any workflow_requestor opted in to this experiment?
        opted_in_users = [
            requestor
--- a/.github/scripts/test_runner_determinator.py
+++ b/.github/scripts/test_runner_determinator.py
@ -38,6 +38,31 @@ class TestRunnerDeterminatorIssueParser(TestCase):
            "otherExp settings not parsed correctly",
        )

+    def test_parse_settings_with_invalid_experiment_name_skips_experiment(self) -> None:
+        settings_text = """
+        experiments:
+            lf:
+                rollout_perc: 25
+            -badExp:
+                rollout_perc: 0
+                default: false
+        ---
+
+        Users:
+        @User1,lf
+        @User2,lf,-badExp
+
+        """
+
+        settings = rd.parse_settings(settings_text)
+
+        self.assertTupleEqual(
+            rd.Experiment(rollout_perc=25),
+            settings.experiments["lf"],
+            "lf settings not parsed correctly",
+        )
+        self.assertNotIn("-badExp", settings.experiments)
+
    def test_parse_settings_in_code_block(self) -> None:
        settings_text = """

@ -161,6 +186,40 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
        self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")

+    def test_explicitly_opted_out_user(self) -> None:
+        settings_text = """
+        experiments:
+            lf:
+                rollout_perc: 100
+            otherExp:
+                rollout_perc: 0
+        ---
+
+        Users:
+        @User1,-lf
+        @User2,lf,otherExp
+
+        """
+        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
+        self.assertEqual("", prefix, "Runner prefix not correct for User1")
+
+    def test_explicitly_opted_in_and_out_user_should_opt_out(self) -> None:
+        settings_text = """
+        experiments:
+            lf:
+                rollout_perc: 100
+            otherExp:
+                rollout_perc: 0
+        ---
+
+        Users:
+        @User1,-lf,lf
+        @User2,lf,otherExp
+
+        """
+        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
+        self.assertEqual("", prefix, "Runner prefix not correct for User1")
+
    def test_opted_in_user_two_experiments(self) -> None:
        settings_text = """
        experiments:
--- a/.github/workflows/_runner-determinator.yml
+++ b/.github/workflows/_runner-determinator.yml
@ -114,9 +114,10 @@ jobs:
              # Opt-ins:
              # Users can opt into the LF fleet by adding their GitHub username to this list
              # and specifying experiments to enable in a comma-separated list.
+              # To always opt out of an experiment, prefix it with a "-".
              # Experiments should be from the above list.

-              @User1,lf,split_build
+              @User1,-lf,split_build
              @User2,lf
              @User3,split_build
          """
@ -125,6 +126,7 @@ jobs:
          import logging
          import os
          import random
+          import re
          import sys
          from argparse import ArgumentParser
          from functools import lru_cache
@ -375,6 +377,27 @@ jobs:
              return optins


+          def is_valid_experiment_name(experiment_name: str) -> bool:
+              """
+              Check if the experiment name is valid.
+              A valid name:
+                  - Contains only alphanumeric characters and the special characters "_" & "-"
+                  - The special characters "_" & "-" shouldn't be the first or last characters
+                  - Cannot contain spaces
+              """
+
+              valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$"
+              valid = bool(re.match(valid_char_regex, experiment_name))
+
+              if valid:
+                  return True
+
+              log.error(
+                  f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters."
+              )
+              return False
+
+
          def parse_settings_from_text(settings_text: str) -> Settings:
              """
              Parse the experiments from the issue body into a list of ExperimentSettings
@ -393,6 +416,10 @@ jobs:
                      experiments = {}

                      for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
+                          if not is_valid_experiment_name(exp_name):
+                              # Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed.
+                              continue
+
                          valid_settings = {}
                          for setting in exp_settings:
                              if setting not in Experiment._fields:
@ -440,6 +467,23 @@ jobs:
              return experiment_name in user_optins.get(user, [])


+          def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
+              """
+              Check if a user explicitly opted out of an experiment
+              """
+              # if the experiment is prefixed with a "-", then it's an opt-out
+              experiment_optout = "-" + experiment_name
+              if experiment_optout not in user_optins.get(user, []):
+                  return False
+
+              if is_user_opted_in(user, user_optins, experiment_name):
+                  log.warning(
+                      f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out"
+                  )
+
+              return True
+
+
          def get_runner_prefix(
              rollout_state: str,
              workflow_requestors: Iterable[str],
@ -472,6 +516,19 @@ jobs:
                      )
                      continue

+                  # Is any workflow_requestor opted out to this experiment?
+                  opted_out_users = [
+                      requestor
+                      for requestor in workflow_requestors
+                      if is_user_opted_out(requestor, user_optins, experiment_name)
+                  ]
+
+                  if opted_out_users:
+                      log.info(
+                          f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}."
+                      )
+                      continue
+
                  # Is any workflow_requestor opted in to this experiment?
                  opted_in_users = [
                      requestor