Add PR number to metrics when available (#109406)

<!--
copilot:summary
-->
### <samp>🤖 Generated by Copilot at 780bfa6</samp>

Add a new metric for pull request number in `tools/stats/upload_metrics.py`. This allows tracking the CI performance of pull requests.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/109406
Approved by: https://github.com/kit1980, https://github.com/malfet, https://github.com/clee2000
This commit is contained in:
Zain Rizvi
2023-09-25 19:57:34 +00:00
committed by PyTorch MergeBot
parent 3de0857503
commit d6cc3ac8b2
2 changed files with 122 additions and 8 deletions

View File

@ -42,11 +42,18 @@ class EnvVarMetric:
def value(self) -> Any:
value = os.environ.get(self.env_var)
if value is None and self.required:
# Github CI will set some env vars to an empty string
DEFAULT_ENVVAR_VALUES = [None, ""]
if value in DEFAULT_ENVVAR_VALUES:
if not self.required:
return None
raise ValueError(
f"Missing {self.name}. Please set the {self.env_var} "
"environment variable to pass in this value."
)
if self.type_conversion_fn:
return self.type_conversion_fn(value)
return value
@ -85,6 +92,7 @@ def emit_metric(
EnvVarMetric("build_environment", "BUILD_ENVIRONMENT"),
EnvVarMetric("job", "GITHUB_JOB"),
EnvVarMetric("test_config", "TEST_CONFIG", required=False),
EnvVarMetric("pr_number", "PR_NUMBER", required=False, type_conversion_fn=int),
EnvVarMetric("run_id", "GITHUB_RUN_ID", type_conversion_fn=int),
EnvVarMetric("run_number", "GITHUB_RUN_NUMBER", type_conversion_fn=int),
EnvVarMetric("run_attempt", "GITHUB_RUN_ATTEMPT", type_conversion_fn=int),
@ -104,7 +112,7 @@ def emit_metric(
"calling_module": calling_module,
"calling_function": calling_function,
"timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f"),
**{m.name: m.value() for m in env_var_metrics},
**{m.name: m.value() for m in env_var_metrics if m.value()},
}
except ValueError as e:
warn(f"Not emitting metrics for {metric_name}. {e}")

View File

@ -18,6 +18,7 @@ JOB = "some-job"
RUN_ID = 56
RUN_NUMBER = 123
RUN_ATTEMPT = 3
PR_NUMBER = 6789
class TestUploadStats(unittest.TestCase):
@ -36,10 +37,11 @@ class TestUploadStats(unittest.TestCase):
"GITHUB_RUN_NUMBER": str(RUN_NUMBER),
"GITHUB_RUN_ATTEMPT": str(RUN_ATTEMPT),
},
clear=True, # Don't read any preset env vars
).start()
@mock.patch("boto3.Session.resource")
def test_emit_metric(self, mock_resource: Any) -> None:
def test_emits_default_and_given_metrics(self, mock_resource: Any) -> None:
metric = {
"some_number": 123,
"float_number": 32.34,
@ -54,7 +56,7 @@ class TestUploadStats(unittest.TestCase):
"metric_name": "metric_name",
"calling_file": "test_upload_stats_lib.py",
"calling_module": current_module,
"calling_function": "test_emit_metric",
"calling_function": "test_emits_default_and_given_metrics",
"repo": REPO,
"workflow": WORKFLOW,
"build_environment": BUILD_ENV,
@ -78,17 +80,96 @@ class TestUploadStats(unittest.TestCase):
emit_metric("metric_name", metric)
self.assertDictContainsSubset(emit_should_include, emitted_metric)
self.assertEqual(
emitted_metric,
{**emit_should_include, **emitted_metric},
)
@mock.patch("boto3.resource")
@mock.patch("boto3.Session.resource")
def test_when_optional_envvar_set_to_actual_value_then_emit_vars_emits_it(
self, mock_resource: Any
) -> None:
metric = {
"some_number": 123,
}
emit_should_include = {
**metric,
"pr_number": PR_NUMBER,
}
mock.patch.dict(
"os.environ",
{
"PR_NUMBER": str(PR_NUMBER),
},
).start()
# Preserve the metric emitted
emitted_metric: Dict[str, Any] = {}
def mock_put_item(Item: Dict[str, Any]) -> None:
nonlocal emitted_metric
emitted_metric = Item
mock_resource.return_value.Table.return_value.put_item = mock_put_item
emit_metric("metric_name", metric)
self.assertEqual(
emitted_metric,
{**emit_should_include, **emitted_metric},
)
@mock.patch("boto3.Session.resource")
def test_when_optional_envvar_set_to_a_empty_str_then_emit_vars_ignores_it(
self, mock_resource: Any
) -> None:
metric = {"some_number": 123}
emit_should_include: Dict[str, Any] = metric.copy()
# Github Actions defaults some env vars to an empty string
default_val = ""
mock.patch.dict(
"os.environ",
{
"PR_NUMBER": default_val,
},
).start()
# Preserve the metric emitted
emitted_metric: Dict[str, Any] = {}
def mock_put_item(Item: Dict[str, Any]) -> None:
nonlocal emitted_metric
emitted_metric = Item
mock_resource.return_value.Table.return_value.put_item = mock_put_item
emit_metric("metric_name", metric)
self.assertEqual(
emitted_metric,
{**emit_should_include, **emitted_metric},
f"Metrics should be emitted when an option parameter is set to '{default_val}'",
)
self.assertFalse(
emitted_metric.get("pr_number"),
f"Metrics should not include optional item 'pr_number' when it's envvar is set to '{default_val}'",
)
@mock.patch("boto3.Session.resource")
def test_blocks_emission_if_reserved_keyword_used(self, mock_resource: Any) -> None:
metric = {"repo": "awesome/repo"}
with self.assertRaises(ValueError):
emit_metric("metric_name", metric)
@mock.patch("boto3.resource")
def test_no_metrics_emitted_if_env_var_not_set(self, mock_resource: Any) -> None:
@mock.patch("boto3.Session.resource")
def test_no_metrics_emitted_if_required_env_var_not_set(
self, mock_resource: Any
) -> None:
metric = {"some_number": 123}
mock.patch.dict(
@ -112,6 +193,31 @@ class TestUploadStats(unittest.TestCase):
self.assertFalse(put_item_invoked)
@mock.patch("boto3.Session.resource")
def test_no_metrics_emitted_if_required_env_var_set_to_empty_string(
self, mock_resource: Any
) -> None:
metric = {"some_number": 123}
mock.patch.dict(
"os.environ",
{
"BUILD_ENVIRONMENT": "",
},
).start()
put_item_invoked = False
def mock_put_item(Item: Dict[str, Any]) -> None:
nonlocal put_item_invoked
put_item_invoked = True
mock_resource.return_value.Table.return_value.put_item = mock_put_item
emit_metric("metric_name", metric)
self.assertFalse(put_item_invoked)
def test_upload_to_rockset_batch_size(self) -> None:
cases = [
{