mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
[experiment][TD] Rating number system (#112676)
Emit excessive amount of heuristic info emitted, but that just means I can do more with it later? Pull Request resolved: https://github.com/pytorch/pytorch/pull/112676 Approved by: https://github.com/ZainRizvi
This commit is contained in:
committed by
PyTorch MergeBot
parent
82875e69fe
commit
0c448526a4
@ -47,6 +47,7 @@ from tools.stats.import_test_stats import (
|
||||
from tools.stats.upload_metrics import add_global_metric, emit_metric
|
||||
from tools.testing.target_determination.determinator import (
|
||||
AggregatedHeuristics,
|
||||
get_prediction_confidences,
|
||||
get_test_prioritizations,
|
||||
)
|
||||
|
||||
@ -1806,7 +1807,17 @@ def main():
|
||||
test_stats["num_total_tests"] = num_tests
|
||||
|
||||
print_to_stderr("Emiting td_test_failure_stats")
|
||||
emit_metric("td_test_failure_stats", test_stats)
|
||||
emit_metric(
|
||||
"td_test_failure_stats",
|
||||
{
|
||||
**test_stats,
|
||||
"confidence_ratings": get_prediction_confidences(
|
||||
selected_tests
|
||||
),
|
||||
"failure": str(test),
|
||||
"tests": selected_tests,
|
||||
},
|
||||
)
|
||||
|
||||
if len(all_failures):
|
||||
for _, err in all_failures:
|
||||
|
@ -167,4 +167,14 @@ def emit_metric(
|
||||
|
||||
|
||||
def _convert_float_values_to_decimals(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {k: Decimal(str(v)) if isinstance(v, float) else v for k, v in data.items()}
|
||||
# Attempt to recurse
|
||||
def _helper(o: Any) -> Any:
|
||||
if isinstance(o, float):
|
||||
return Decimal(str(o))
|
||||
if isinstance(o, list):
|
||||
return [_helper(v) for v in o]
|
||||
if isinstance(o, dict):
|
||||
return {_helper(k): _helper(v) for k, v in o.items()}
|
||||
return o
|
||||
|
||||
return {k: _helper(v) for k, v in data.items()}
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import List
|
||||
from typing import Dict, List
|
||||
|
||||
from tools.testing.target_determination.heuristics import (
|
||||
AggregatedHeuristics as AggregatedHeuristics,
|
||||
@ -27,3 +27,11 @@ def get_test_prioritizations(tests: List[str]) -> AggregatedHeuristics:
|
||||
new_rankings.print_info()
|
||||
|
||||
return aggregated_results
|
||||
|
||||
|
||||
def get_prediction_confidences(tests: List[str]) -> Dict[str, Dict[str, float]]:
|
||||
# heuristic name -> test -> rating/confidence
|
||||
rankings: Dict[str, Dict[str, float]] = {}
|
||||
for heuristic in HEURISTICS:
|
||||
rankings[heuristic.name] = heuristic.get_prediction_confidence(tests)
|
||||
return rankings
|
||||
|
@ -10,7 +10,11 @@ from tools.testing.target_determination.heuristics.interface import (
|
||||
TestPrioritizations,
|
||||
)
|
||||
|
||||
from tools.testing.target_determination.heuristics.utils import get_correlated_tests
|
||||
from tools.testing.target_determination.heuristics.utils import (
|
||||
get_correlated_tests,
|
||||
get_ratings_for_tests,
|
||||
normalize_ratings,
|
||||
)
|
||||
|
||||
|
||||
class CorrelatedWithHistoricalFailures(HeuristicInterface):
|
||||
@ -27,3 +31,10 @@ class CorrelatedWithHistoricalFailures(HeuristicInterface):
|
||||
)
|
||||
|
||||
return test_rankings
|
||||
|
||||
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
|
||||
test_ratings = get_ratings_for_tests(
|
||||
ADDITIONAL_CI_FILES_FOLDER / TEST_FILE_RATINGS_FILE
|
||||
)
|
||||
test_ratings = {k: v for (k, v) in test_ratings.items() if k in tests}
|
||||
return normalize_ratings(test_ratings, 1)
|
||||
|
@ -25,6 +25,10 @@ class EditedByPR(HeuristicInterface):
|
||||
|
||||
return test_rankings
|
||||
|
||||
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
|
||||
critical_tests = _get_modified_tests()
|
||||
return {test: 1 for test in critical_tests if test in tests}
|
||||
|
||||
|
||||
def _get_modified_tests() -> Set[str]:
|
||||
try:
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Any, List
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from tools.stats.import_test_stats import (
|
||||
ADDITIONAL_CI_FILES_FOLDER,
|
||||
@ -10,7 +10,11 @@ from tools.testing.target_determination.heuristics.interface import (
|
||||
TestPrioritizations,
|
||||
)
|
||||
|
||||
from tools.testing.target_determination.heuristics.utils import get_correlated_tests
|
||||
from tools.testing.target_determination.heuristics.utils import (
|
||||
get_correlated_tests,
|
||||
get_ratings_for_tests,
|
||||
normalize_ratings,
|
||||
)
|
||||
|
||||
|
||||
# This heuristic assumes that changed files in previous commits are good sources
|
||||
@ -32,3 +36,10 @@ class HistorialEditedFiles(HeuristicInterface):
|
||||
)
|
||||
|
||||
return test_rankings
|
||||
|
||||
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
|
||||
test_ratings = get_ratings_for_tests(
|
||||
ADDITIONAL_CI_FILES_FOLDER / TD_HEURISTIC_HISTORICAL_EDITED_FILES
|
||||
)
|
||||
test_ratings = {k: v for (k, v) in test_ratings.items() if k in tests}
|
||||
return normalize_ratings(test_ratings, 1)
|
||||
|
@ -458,3 +458,13 @@ class HeuristicInterface:
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.name
|
||||
|
||||
@abstractmethod
|
||||
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
|
||||
"""
|
||||
Like get_test_priorities, but instead returns a float ranking ranging
|
||||
from -1 to 1, where negative means skip, positive means run, 0 means no
|
||||
idea, and magnitude = how confident the heuristic is. Used by
|
||||
AggregatedHeuristicsRankings.
|
||||
"""
|
||||
pass
|
||||
|
@ -26,6 +26,10 @@ class PreviouslyFailedInPR(HeuristicInterface):
|
||||
|
||||
return test_rankings
|
||||
|
||||
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
|
||||
critical_tests = _get_previously_failing_tests()
|
||||
return {test: 1 for test in critical_tests if test in tests}
|
||||
|
||||
|
||||
def _get_previously_failing_tests() -> Set[str]:
|
||||
PYTEST_FAILED_TESTS_CACHE_FILE_PATH = Path(".pytest_cache/v/cache/lastfailed")
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Any, List
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from tools.stats.import_test_stats import (
|
||||
ADDITIONAL_CI_FILES_FOLDER,
|
||||
@ -10,7 +10,11 @@ from tools.testing.target_determination.heuristics.interface import (
|
||||
TestPrioritizations,
|
||||
)
|
||||
|
||||
from tools.testing.target_determination.heuristics.utils import get_correlated_tests
|
||||
from tools.testing.target_determination.heuristics.utils import (
|
||||
get_correlated_tests,
|
||||
get_ratings_for_tests,
|
||||
normalize_ratings,
|
||||
)
|
||||
|
||||
|
||||
# Profilers were used to gather simple python code coverage information for each
|
||||
@ -30,3 +34,10 @@ class Profiling(HeuristicInterface):
|
||||
)
|
||||
|
||||
return test_rankings
|
||||
|
||||
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
|
||||
test_ratings = get_ratings_for_tests(
|
||||
ADDITIONAL_CI_FILES_FOLDER / TD_HEURISTIC_PROFILING_FILE
|
||||
)
|
||||
test_ratings = {k: v for (k, v) in test_ratings.items() if k in tests}
|
||||
return normalize_ratings(test_ratings, 1)
|
||||
|
@ -44,21 +44,44 @@ def query_changed_files() -> List[str]:
|
||||
return lines
|
||||
|
||||
|
||||
def get_correlated_tests(file: Union[str, Path]) -> List[str]:
|
||||
def normalize_ratings(ratings: Dict[str, float], max_value: float) -> Dict[str, float]:
|
||||
# Takse the ratings, makes the max value into max_value, and proportionally
|
||||
# distributes the rest of the ratings.
|
||||
# Ex [1,2,3,4] and max_value 8 gets converted to [2,4,6,8]
|
||||
# Assumes all rankings are >= 0
|
||||
# Don't modify in place
|
||||
if len(ratings) == 0:
|
||||
return ratings
|
||||
min_rating = min(ratings.values())
|
||||
assert min_rating > 0
|
||||
max_rating = max(ratings.values())
|
||||
assert max_rating > 0
|
||||
normalized_ratings = {}
|
||||
for tf, rank in ratings.items():
|
||||
normalized_ratings[tf] = rank / max_rating * max_value
|
||||
return normalized_ratings
|
||||
|
||||
|
||||
def get_ratings_for_tests(file: Union[str, Path]) -> Dict[str, float]:
|
||||
path = REPO_ROOT / file
|
||||
if not os.path.exists(path):
|
||||
print(f"could not find path {path}")
|
||||
return []
|
||||
return {}
|
||||
with open(path) as f:
|
||||
test_file_ratings = cast(Dict[str, Dict[str, float]], json.load(f))
|
||||
try:
|
||||
changed_files = query_changed_files()
|
||||
except Exception as e:
|
||||
warn(f"Can't query changed test files due to {e}")
|
||||
return []
|
||||
return {}
|
||||
ratings: Dict[str, float] = defaultdict(float)
|
||||
for file in changed_files:
|
||||
for test_file, score in test_file_ratings.get(file, {}).items():
|
||||
ratings[test_file] += score
|
||||
return ratings
|
||||
|
||||
|
||||
def get_correlated_tests(file: Union[str, Path]) -> List[str]:
|
||||
ratings = get_ratings_for_tests(file)
|
||||
prioritize = sorted(ratings, key=lambda x: -ratings[x])
|
||||
return prioritize
|
||||
|
Reference in New Issue
Block a user