From b69282c98c6d16f2e7eca4e91f1e8587597af105 Mon Sep 17 00:00:00 2001
From: Zain Rizvi <ZainR@meta.com>
Date: Thu, 14 Nov 2024 19:18:20 +0000
Subject: [PATCH] Enable opting out of experiments even when they're being
 rolled out (#140433)

Enables opting out of specific experiments in the runner determinator

To opt out:
1. Go to the tracking issue: https://github.com/pytorch/test-infra/issues/5132
2. In the entry by your name, enter the experiment name, prefixed with a `-`.  For example, to opt out of the LF fleet you could enter `@ZainRIzvi,-lf`

This lets you simultaneously be opted into some experiments and opted out of others.

While the `disable-runner-experiments` label offers an option to disable all experiments on a given PR, this one lets you disable a selected set of experiments across all your PRs.

Fixes https://github.com/pytorch/pytorch/issues/138099

Pull Request resolved: https://github.com/pytorch/pytorch/pull/140433
Approved by: https://github.com/zxiiro, https://github.com/jeanschmidt
---
 .github/scripts/runner_determinator.py      | 59 ++++++++++++++++++++-
 .github/scripts/test_runner_determinator.py | 59 +++++++++++++++++++++
 .github/workflows/_runner-determinator.yml  | 59 ++++++++++++++++++++-
 3 files changed, 175 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/runner_determinator.py b/.github/scripts/runner_determinator.py
index 8a8b3880b759..4e208114b642 100644
--- a/.github/scripts/runner_determinator.py
+++ b/.github/scripts/runner_determinator.py
@@ -46,9 +46,10 @@ Example config:
     # Opt-ins:
     # Users can opt into the LF fleet by adding their GitHub username to this list
     # and specifying experiments to enable in a comma-separated list.
+    # To always opt out of an experiment, prefix it with a "-".
     # Experiments should be from the above list.
 
-    @User1,lf,split_build
+    @User1,-lf,split_build
     @User2,lf
     @User3,split_build
 """
@@ -57,6 +58,7 @@ import json
 import logging
 import os
 import random
+import re
 import sys
 from argparse import ArgumentParser
 from functools import lru_cache
@@ -307,6 +309,27 @@ def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins:
     return optins
 
 
+def is_valid_experiment_name(experiment_name: str) -> bool:
+    """
+    Check if the experiment name is valid.
+    A valid name:
+        - Contains only alphanumeric characters and the special characters "_" & "-"
+        - The special characters "_" & "-" shouldn't be the first or last characters
+        - Cannot contain spaces
+    """
+
+    valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$"
+    valid = bool(re.match(valid_char_regex, experiment_name))
+
+    if valid:
+        return True
+
+    log.error(
+        f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters."
+    )
+    return False
+
+
 def parse_settings_from_text(settings_text: str) -> Settings:
     """
     Parse the experiments from the issue body into a list of ExperimentSettings
@@ -325,6 +348,10 @@ def parse_settings_from_text(settings_text: str) -> Settings:
             experiments = {}
 
             for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
+                if not is_valid_experiment_name(exp_name):
+                    # Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed.
+                    continue
+
                 valid_settings = {}
                 for setting in exp_settings:
                     if setting not in Experiment._fields:
@@ -372,6 +399,23 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -
     return experiment_name in user_optins.get(user, [])
 
 
+def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
+    """
+    Check if a user explicitly opted out of an experiment
+    """
+    # if the experiment is prefixed with a "-", then it's an opt-out
+    experiment_optout = "-" + experiment_name
+    if experiment_optout not in user_optins.get(user, []):
+        return False
+
+    if is_user_opted_in(user, user_optins, experiment_name):
+        log.warning(
+            f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out"
+        )
+
+    return True
+
+
 def get_runner_prefix(
     rollout_state: str,
     workflow_requestors: Iterable[str],
@@ -404,6 +448,19 @@ def get_runner_prefix(
             )
             continue
 
+        # Is any workflow_requestor opted out to this experiment?
+        opted_out_users = [
+            requestor
+            for requestor in workflow_requestors
+            if is_user_opted_out(requestor, user_optins, experiment_name)
+        ]
+
+        if opted_out_users:
+            log.info(
+                f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}."
+            )
+            continue
+
         # Is any workflow_requestor opted in to this experiment?
         opted_in_users = [
             requestor
diff --git a/.github/scripts/test_runner_determinator.py b/.github/scripts/test_runner_determinator.py
index b3e3ec55c348..e8f9f1b8b4aa 100644
--- a/.github/scripts/test_runner_determinator.py
+++ b/.github/scripts/test_runner_determinator.py
@@ -38,6 +38,31 @@ class TestRunnerDeterminatorIssueParser(TestCase):
             "otherExp settings not parsed correctly",
         )
 
+    def test_parse_settings_with_invalid_experiment_name_skips_experiment(self) -> None:
+        settings_text = """
+        experiments:
+            lf:
+                rollout_perc: 25
+            -badExp:
+                rollout_perc: 0
+                default: false
+        ---
+
+        Users:
+        @User1,lf
+        @User2,lf,-badExp
+
+        """
+
+        settings = rd.parse_settings(settings_text)
+
+        self.assertTupleEqual(
+            rd.Experiment(rollout_perc=25),
+            settings.experiments["lf"],
+            "lf settings not parsed correctly",
+        )
+        self.assertNotIn("-badExp", settings.experiments)
+
     def test_parse_settings_in_code_block(self) -> None:
         settings_text = """
 
@@ -161,6 +186,40 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
         prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
         self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
 
+    def test_explicitly_opted_out_user(self) -> None:
+        settings_text = """
+        experiments:
+            lf:
+                rollout_perc: 100
+            otherExp:
+                rollout_perc: 0
+        ---
+
+        Users:
+        @User1,-lf
+        @User2,lf,otherExp
+
+        """
+        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
+        self.assertEqual("", prefix, "Runner prefix not correct for User1")
+
+    def test_explicitly_opted_in_and_out_user_should_opt_out(self) -> None:
+        settings_text = """
+        experiments:
+            lf:
+                rollout_perc: 100
+            otherExp:
+                rollout_perc: 0
+        ---
+
+        Users:
+        @User1,-lf,lf
+        @User2,lf,otherExp
+
+        """
+        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
+        self.assertEqual("", prefix, "Runner prefix not correct for User1")
+
     def test_opted_in_user_two_experiments(self) -> None:
         settings_text = """
         experiments:
diff --git a/.github/workflows/_runner-determinator.yml b/.github/workflows/_runner-determinator.yml
index a1b1bb0f8093..ca24c7c4a7c2 100644
--- a/.github/workflows/_runner-determinator.yml
+++ b/.github/workflows/_runner-determinator.yml
@@ -114,9 +114,10 @@ jobs:
               # Opt-ins:
               # Users can opt into the LF fleet by adding their GitHub username to this list
               # and specifying experiments to enable in a comma-separated list.
+              # To always opt out of an experiment, prefix it with a "-".
               # Experiments should be from the above list.
 
-              @User1,lf,split_build
+              @User1,-lf,split_build
               @User2,lf
               @User3,split_build
           """
@@ -125,6 +126,7 @@ jobs:
           import logging
           import os
           import random
+          import re
           import sys
           from argparse import ArgumentParser
           from functools import lru_cache
@@ -375,6 +377,27 @@ jobs:
               return optins
 
 
+          def is_valid_experiment_name(experiment_name: str) -> bool:
+              """
+              Check if the experiment name is valid.
+              A valid name:
+                  - Contains only alphanumeric characters and the special characters "_" & "-"
+                  - The special characters "_" & "-" shouldn't be the first or last characters
+                  - Cannot contain spaces
+              """
+
+              valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$"
+              valid = bool(re.match(valid_char_regex, experiment_name))
+
+              if valid:
+                  return True
+
+              log.error(
+                  f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters."
+              )
+              return False
+
+
           def parse_settings_from_text(settings_text: str) -> Settings:
               """
               Parse the experiments from the issue body into a list of ExperimentSettings
@@ -393,6 +416,10 @@ jobs:
                       experiments = {}
 
                       for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
+                          if not is_valid_experiment_name(exp_name):
+                              # Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed.
+                              continue
+
                           valid_settings = {}
                           for setting in exp_settings:
                               if setting not in Experiment._fields:
@@ -440,6 +467,23 @@ jobs:
               return experiment_name in user_optins.get(user, [])
 
 
+          def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
+              """
+              Check if a user explicitly opted out of an experiment
+              """
+              # if the experiment is prefixed with a "-", then it's an opt-out
+              experiment_optout = "-" + experiment_name
+              if experiment_optout not in user_optins.get(user, []):
+                  return False
+
+              if is_user_opted_in(user, user_optins, experiment_name):
+                  log.warning(
+                      f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out"
+                  )
+
+              return True
+
+
           def get_runner_prefix(
               rollout_state: str,
               workflow_requestors: Iterable[str],
@@ -472,6 +516,19 @@ jobs:
                       )
                       continue
 
+                  # Is any workflow_requestor opted out to this experiment?
+                  opted_out_users = [
+                      requestor
+                      for requestor in workflow_requestors
+                      if is_user_opted_out(requestor, user_optins, experiment_name)
+                  ]
+
+                  if opted_out_users:
+                      log.info(
+                          f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}."
+                      )
+                      continue
+
                   # Is any workflow_requestor opted in to this experiment?
                   opted_in_users = [
                       requestor