Enable opting out of experiments even when they're being rolled out (#140433)

Enables opting out of specific experiments in the runner determinator

To opt out:
1. Go to the tracking issue: https://github.com/pytorch/test-infra/issues/5132
2. In the entry by your name, enter the experiment name, prefixed with a `-`.  For example, to opt out of the LF fleet you could enter `@ZainRIzvi,-lf`

This lets you simultaneously be opted into some experiments and opted out of others.

While the `disable-runner-experiments` label offers an option to disable all experiments on a given PR, this one lets you disable a selected set of experiments across all your PRs.

Fixes https://github.com/pytorch/pytorch/issues/138099

Pull Request resolved: https://github.com/pytorch/pytorch/pull/140433
Approved by: https://github.com/zxiiro, https://github.com/jeanschmidt
This commit is contained in:
Zain Rizvi
2024-11-14 19:18:20 +00:00
committed by PyTorch MergeBot
parent b11ff3cf60
commit b69282c98c
3 changed files with 175 additions and 2 deletions

View File

@ -46,9 +46,10 @@ Example config:
# Opt-ins:
# Users can opt into the LF fleet by adding their GitHub username to this list
# and specifying experiments to enable in a comma-separated list.
# To always opt out of an experiment, prefix it with a "-".
# Experiments should be from the above list.
@User1,lf,split_build
@User1,-lf,split_build
@User2,lf
@User3,split_build
"""
@ -57,6 +58,7 @@ import json
import logging
import os
import random
import re
import sys
from argparse import ArgumentParser
from functools import lru_cache
@ -307,6 +309,27 @@ def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins:
return optins
def is_valid_experiment_name(experiment_name: str) -> bool:
"""
Check if the experiment name is valid.
A valid name:
- Contains only alphanumeric characters and the special characters "_" & "-"
- The special characters "_" & "-" shouldn't be the first or last characters
- Cannot contain spaces
"""
valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$"
valid = bool(re.match(valid_char_regex, experiment_name))
if valid:
return True
log.error(
f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters."
)
return False
def parse_settings_from_text(settings_text: str) -> Settings:
"""
Parse the experiments from the issue body into a list of ExperimentSettings
@ -325,6 +348,10 @@ def parse_settings_from_text(settings_text: str) -> Settings:
experiments = {}
for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
if not is_valid_experiment_name(exp_name):
# Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed.
continue
valid_settings = {}
for setting in exp_settings:
if setting not in Experiment._fields:
@ -372,6 +399,23 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -
return experiment_name in user_optins.get(user, [])
def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
"""
Check if a user explicitly opted out of an experiment
"""
# if the experiment is prefixed with a "-", then it's an opt-out
experiment_optout = "-" + experiment_name
if experiment_optout not in user_optins.get(user, []):
return False
if is_user_opted_in(user, user_optins, experiment_name):
log.warning(
f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out"
)
return True
def get_runner_prefix(
rollout_state: str,
workflow_requestors: Iterable[str],
@ -404,6 +448,19 @@ def get_runner_prefix(
)
continue
# Is any workflow_requestor opted out to this experiment?
opted_out_users = [
requestor
for requestor in workflow_requestors
if is_user_opted_out(requestor, user_optins, experiment_name)
]
if opted_out_users:
log.info(
f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}."
)
continue
# Is any workflow_requestor opted in to this experiment?
opted_in_users = [
requestor

View File

@ -38,6 +38,31 @@ class TestRunnerDeterminatorIssueParser(TestCase):
"otherExp settings not parsed correctly",
)
def test_parse_settings_with_invalid_experiment_name_skips_experiment(self) -> None:
settings_text = """
experiments:
lf:
rollout_perc: 25
-badExp:
rollout_perc: 0
default: false
---
Users:
@User1,lf
@User2,lf,-badExp
"""
settings = rd.parse_settings(settings_text)
self.assertTupleEqual(
rd.Experiment(rollout_perc=25),
settings.experiments["lf"],
"lf settings not parsed correctly",
)
self.assertNotIn("-badExp", settings.experiments)
def test_parse_settings_in_code_block(self) -> None:
settings_text = """
@ -161,6 +186,40 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
def test_explicitly_opted_out_user(self) -> None:
settings_text = """
experiments:
lf:
rollout_perc: 100
otherExp:
rollout_perc: 0
---
Users:
@User1,-lf
@User2,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
self.assertEqual("", prefix, "Runner prefix not correct for User1")
def test_explicitly_opted_in_and_out_user_should_opt_out(self) -> None:
settings_text = """
experiments:
lf:
rollout_perc: 100
otherExp:
rollout_perc: 0
---
Users:
@User1,-lf,lf
@User2,lf,otherExp
"""
prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
self.assertEqual("", prefix, "Runner prefix not correct for User1")
def test_opted_in_user_two_experiments(self) -> None:
settings_text = """
experiments:

View File

@ -114,9 +114,10 @@ jobs:
# Opt-ins:
# Users can opt into the LF fleet by adding their GitHub username to this list
# and specifying experiments to enable in a comma-separated list.
# To always opt out of an experiment, prefix it with a "-".
# Experiments should be from the above list.
@User1,lf,split_build
@User1,-lf,split_build
@User2,lf
@User3,split_build
"""
@ -125,6 +126,7 @@ jobs:
import logging
import os
import random
import re
import sys
from argparse import ArgumentParser
from functools import lru_cache
@ -375,6 +377,27 @@ jobs:
return optins
def is_valid_experiment_name(experiment_name: str) -> bool:
"""
Check if the experiment name is valid.
A valid name:
- Contains only alphanumeric characters and the special characters "_" & "-"
- The special characters "_" & "-" shouldn't be the first or last characters
- Cannot contain spaces
"""
valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$"
valid = bool(re.match(valid_char_regex, experiment_name))
if valid:
return True
log.error(
f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters."
)
return False
def parse_settings_from_text(settings_text: str) -> Settings:
"""
Parse the experiments from the issue body into a list of ExperimentSettings
@ -393,6 +416,10 @@ jobs:
experiments = {}
for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
if not is_valid_experiment_name(exp_name):
# Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed.
continue
valid_settings = {}
for setting in exp_settings:
if setting not in Experiment._fields:
@ -440,6 +467,23 @@ jobs:
return experiment_name in user_optins.get(user, [])
def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
"""
Check if a user explicitly opted out of an experiment
"""
# if the experiment is prefixed with a "-", then it's an opt-out
experiment_optout = "-" + experiment_name
if experiment_optout not in user_optins.get(user, []):
return False
if is_user_opted_in(user, user_optins, experiment_name):
log.warning(
f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out"
)
return True
def get_runner_prefix(
rollout_state: str,
workflow_requestors: Iterable[str],
@ -472,6 +516,19 @@ jobs:
)
continue
# Is any workflow_requestor opted out to this experiment?
opted_out_users = [
requestor
for requestor in workflow_requestors
if is_user_opted_out(requestor, user_optins, experiment_name)
]
if opted_out_users:
log.info(
f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}."
)
continue
# Is any workflow_requestor opted in to this experiment?
opted_in_users = [
requestor