From b69282c98c6d16f2e7eca4e91f1e8587597af105 Mon Sep 17 00:00:00 2001 From: Zain Rizvi Date: Thu, 14 Nov 2024 19:18:20 +0000 Subject: [PATCH] Enable opting out of experiments even when they're being rolled out (#140433) Enables opting out of specific experiments in the runner determinator To opt out: 1. Go to the tracking issue: https://github.com/pytorch/test-infra/issues/5132 2. In the entry by your name, enter the experiment name, prefixed with a `-`. For example, to opt out of the LF fleet you could enter `@ZainRIzvi,-lf` This lets you simultaneously be opted into some experiments and opted out of others. While the `disable-runner-experiments` label offers an option to disable all experiments on a given PR, this one lets you disable a selected set of experiments across all your PRs. Fixes https://github.com/pytorch/pytorch/issues/138099 Pull Request resolved: https://github.com/pytorch/pytorch/pull/140433 Approved by: https://github.com/zxiiro, https://github.com/jeanschmidt --- .github/scripts/runner_determinator.py | 59 ++++++++++++++++++++- .github/scripts/test_runner_determinator.py | 59 +++++++++++++++++++++ .github/workflows/_runner-determinator.yml | 59 ++++++++++++++++++++- 3 files changed, 175 insertions(+), 2 deletions(-) diff --git a/.github/scripts/runner_determinator.py b/.github/scripts/runner_determinator.py index 8a8b3880b759..4e208114b642 100644 --- a/.github/scripts/runner_determinator.py +++ b/.github/scripts/runner_determinator.py @@ -46,9 +46,10 @@ Example config: # Opt-ins: # Users can opt into the LF fleet by adding their GitHub username to this list # and specifying experiments to enable in a comma-separated list. + # To always opt out of an experiment, prefix it with a "-". # Experiments should be from the above list. - @User1,lf,split_build + @User1,-lf,split_build @User2,lf @User3,split_build """ @@ -57,6 +58,7 @@ import json import logging import os import random +import re import sys from argparse import ArgumentParser from functools import lru_cache @@ -307,6 +309,27 @@ def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins: return optins +def is_valid_experiment_name(experiment_name: str) -> bool: + """ + Check if the experiment name is valid. + A valid name: + - Contains only alphanumeric characters and the special characters "_" & "-" + - The special characters "_" & "-" shouldn't be the first or last characters + - Cannot contain spaces + """ + + valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$" + valid = bool(re.match(valid_char_regex, experiment_name)) + + if valid: + return True + + log.error( + f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters." + ) + return False + + def parse_settings_from_text(settings_text: str) -> Settings: """ Parse the experiments from the issue body into a list of ExperimentSettings @@ -325,6 +348,10 @@ def parse_settings_from_text(settings_text: str) -> Settings: experiments = {} for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items(): + if not is_valid_experiment_name(exp_name): + # Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed. + continue + valid_settings = {} for setting in exp_settings: if setting not in Experiment._fields: @@ -372,6 +399,23 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) - return experiment_name in user_optins.get(user, []) +def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool: + """ + Check if a user explicitly opted out of an experiment + """ + # if the experiment is prefixed with a "-", then it's an opt-out + experiment_optout = "-" + experiment_name + if experiment_optout not in user_optins.get(user, []): + return False + + if is_user_opted_in(user, user_optins, experiment_name): + log.warning( + f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out" + ) + + return True + + def get_runner_prefix( rollout_state: str, workflow_requestors: Iterable[str], @@ -404,6 +448,19 @@ def get_runner_prefix( ) continue + # Is any workflow_requestor opted out to this experiment? + opted_out_users = [ + requestor + for requestor in workflow_requestors + if is_user_opted_out(requestor, user_optins, experiment_name) + ] + + if opted_out_users: + log.info( + f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}." + ) + continue + # Is any workflow_requestor opted in to this experiment? opted_in_users = [ requestor diff --git a/.github/scripts/test_runner_determinator.py b/.github/scripts/test_runner_determinator.py index b3e3ec55c348..e8f9f1b8b4aa 100644 --- a/.github/scripts/test_runner_determinator.py +++ b/.github/scripts/test_runner_determinator.py @@ -38,6 +38,31 @@ class TestRunnerDeterminatorIssueParser(TestCase): "otherExp settings not parsed correctly", ) + def test_parse_settings_with_invalid_experiment_name_skips_experiment(self) -> None: + settings_text = """ + experiments: + lf: + rollout_perc: 25 + -badExp: + rollout_perc: 0 + default: false + --- + + Users: + @User1,lf + @User2,lf,-badExp + + """ + + settings = rd.parse_settings(settings_text) + + self.assertTupleEqual( + rd.Experiment(rollout_perc=25), + settings.experiments["lf"], + "lf settings not parsed correctly", + ) + self.assertNotIn("-badExp", settings.experiments) + def test_parse_settings_in_code_block(self) -> None: settings_text = """ @@ -161,6 +186,40 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase): prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH) self.assertEqual("lf.", prefix, "Runner prefix not correct for User1") + def test_explicitly_opted_out_user(self) -> None: + settings_text = """ + experiments: + lf: + rollout_perc: 100 + otherExp: + rollout_perc: 0 + --- + + Users: + @User1,-lf + @User2,lf,otherExp + + """ + prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH) + self.assertEqual("", prefix, "Runner prefix not correct for User1") + + def test_explicitly_opted_in_and_out_user_should_opt_out(self) -> None: + settings_text = """ + experiments: + lf: + rollout_perc: 100 + otherExp: + rollout_perc: 0 + --- + + Users: + @User1,-lf,lf + @User2,lf,otherExp + + """ + prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH) + self.assertEqual("", prefix, "Runner prefix not correct for User1") + def test_opted_in_user_two_experiments(self) -> None: settings_text = """ experiments: diff --git a/.github/workflows/_runner-determinator.yml b/.github/workflows/_runner-determinator.yml index a1b1bb0f8093..ca24c7c4a7c2 100644 --- a/.github/workflows/_runner-determinator.yml +++ b/.github/workflows/_runner-determinator.yml @@ -114,9 +114,10 @@ jobs: # Opt-ins: # Users can opt into the LF fleet by adding their GitHub username to this list # and specifying experiments to enable in a comma-separated list. + # To always opt out of an experiment, prefix it with a "-". # Experiments should be from the above list. - @User1,lf,split_build + @User1,-lf,split_build @User2,lf @User3,split_build """ @@ -125,6 +126,7 @@ jobs: import logging import os import random + import re import sys from argparse import ArgumentParser from functools import lru_cache @@ -375,6 +377,27 @@ jobs: return optins + def is_valid_experiment_name(experiment_name: str) -> bool: + """ + Check if the experiment name is valid. + A valid name: + - Contains only alphanumeric characters and the special characters "_" & "-" + - The special characters "_" & "-" shouldn't be the first or last characters + - Cannot contain spaces + """ + + valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$" + valid = bool(re.match(valid_char_regex, experiment_name)) + + if valid: + return True + + log.error( + f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters." + ) + return False + + def parse_settings_from_text(settings_text: str) -> Settings: """ Parse the experiments from the issue body into a list of ExperimentSettings @@ -393,6 +416,10 @@ jobs: experiments = {} for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items(): + if not is_valid_experiment_name(exp_name): + # Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed. + continue + valid_settings = {} for setting in exp_settings: if setting not in Experiment._fields: @@ -440,6 +467,23 @@ jobs: return experiment_name in user_optins.get(user, []) + def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool: + """ + Check if a user explicitly opted out of an experiment + """ + # if the experiment is prefixed with a "-", then it's an opt-out + experiment_optout = "-" + experiment_name + if experiment_optout not in user_optins.get(user, []): + return False + + if is_user_opted_in(user, user_optins, experiment_name): + log.warning( + f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out" + ) + + return True + + def get_runner_prefix( rollout_state: str, workflow_requestors: Iterable[str], @@ -472,6 +516,19 @@ jobs: ) continue + # Is any workflow_requestor opted out to this experiment? + opted_out_users = [ + requestor + for requestor in workflow_requestors + if is_user_opted_out(requestor, user_optins, experiment_name) + ] + + if opted_out_users: + log.info( + f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}." + ) + continue + # Is any workflow_requestor opted in to this experiment? opted_in_users = [ requestor