[ez] Remove unused code in upload_test_stats (#111504)

This is code related to parallelism and test times that isn't used, so remove it.

Tested by running locally with `python3 -m tools.stats.upload_test_stats --workflow-run-id 6551035874 --workflow-run-attempt 1 --head-branch main --head-repository "pytorch/pytorch"` and commenting out parts for uploading to s3.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/111504
Approved by: https://github.com/huydhn
This commit is contained in:
Catherine Lee
2023-10-19 16:09:15 +00:00
committed by PyTorch MergeBot
parent 4e310fd875
commit 0617f7fa75
4 changed files with 17 additions and 84 deletions

View File

@ -0,0 +1 @@
{"test_autocast (__main__.TestPartitioning)": 90.782, "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 67.44933333333334, "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 89.746, "test_comprehensive_nn_functional_unfold_cuda_complex128 (__main__.TestDecompCUDA)": 108.58233333333332, "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 75.33933333333334, "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 61.149, "test_memory_format_operators_cuda (__main__.TestTorchDeviceTypeCUDA)": 69.38900000000001, "test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 218.42066666666665, "test_quick_core_backward_unsafe_split_cuda_float64 (__main__.TestDecompCUDA)": 217.23433333333332, "test_retracibility_dynamic_shapes (__main__.DynamicShapesExportTests)": 367.1793333333333, "test_rnn_decomp_module_nn_LSTM_train_mode_cuda_float32 (__main__.TestDecompCUDA)": 128.60033333333334, "test_rnn_decomp_module_nn_LSTM_train_mode_cuda_float64 (__main__.TestDecompCUDA)": 126.46566666666666, "test_transpose_copy (__main__.CPUReproTests)": 60.71444444444445, "test_variant_consistency_jit_nn_functional_max_pool1d_cuda_float32 (__main__.TestJitCUDA)": 72.77266666666667, "test_variant_consistency_jit_svd_cuda_complex64 (__main__.TestJitCUDA)": 87.36566666666666, "test_views1_dynamic_shapes_cuda (__main__.DynamicShapesCudaTests)": 62.47400000000001, "test_vmapjvpvjp_nn_functional_poisson_nll_loss_cuda_float32 (__main__.TestOperatorsCUDA)": 66.62266666666666, "test_vmapvjpvjp_broadcast_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 93.915, "test_vmapvjpvjp_linalg_solve_triangular_cuda_float32 (__main__.TestOperatorsCUDA)": 80.409, "test_vmapvjpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 70.152, "test_vmapvjpvjp_nn_functional_conv2d_cuda_float32 (__main__.TestOperatorsCUDA)": 67.51066666666667, "test_vmapvjpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 60.07633333333333, "test_warp_softmax_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 70.2475, "test_warp_softmax_64bit_indexing_cuda_float32 (__main__.TestNNDeviceTypeCUDA)": 77.0975}

File diff suppressed because one or more lines are too long

View File

@ -4,7 +4,7 @@ import sys
import xml.etree.ElementTree as ET
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List, Optional
from tools.stats.upload_stats_lib import (
download_gha_artifacts,
@ -14,13 +14,16 @@ from tools.stats.upload_stats_lib import (
)
def get_job_id(report: Path) -> int:
def get_job_id(report: Path) -> Optional[int]:
# [Job id in artifacts]
# Retrieve the job id from the report path. In our GHA workflows, we append
# the job id to the end of the report name, so `report` looks like:
# unzipped-test-reports-foo_5596745227/test/test-reports/foo/TEST-foo.xml
# and we want to get `5596745227` out of it.
return int(report.parts[0].rpartition("_")[2])
try:
return int(report.parts[0].rpartition("_")[2])
except ValueError:
return None
def parse_xml_report(
@ -32,12 +35,8 @@ def parse_xml_report(
"""Convert a test report xml file into a JSON-serializable list of test cases."""
print(f"Parsing {tag}s for test report: {report}")
try:
job_id = get_job_id(report)
print(f"Found job id: {job_id}")
except Exception:
job_id = None
print("Failed to find job id")
job_id = get_job_id(report)
print(f"Found job id: {job_id}")
test_cases: List[Dict[str, Any]] = []
@ -121,24 +120,7 @@ def process_xml_element(element: ET.Element) -> Dict[str, Any]:
return ret
def get_pytest_parallel_times() -> Dict[Any, Any]:
pytest_parallel_times: Dict[Any, Any] = {}
for report in Path(".").glob("**/python-pytest/**/*.xml"):
invoking_file = report.parent.name
root = ET.parse(report)
assert len(list(root.iter("testsuite"))) == 1
for test_suite in root.iter("testsuite"):
pytest_parallel_times[
(invoking_file, get_job_id(report))
] = test_suite.attrib["time"]
return pytest_parallel_times
def get_tests(
workflow_run_id: int, workflow_run_attempt: int
) -> Tuple[List[Dict[str, Any]], Dict[Any, Any]]:
def get_tests(workflow_run_id: int, workflow_run_attempt: int) -> List[Dict[str, Any]]:
with TemporaryDirectory() as temp_dir:
print("Using temporary directory:", temp_dir)
os.chdir(temp_dir)
@ -168,14 +150,12 @@ def get_tests(
)
)
pytest_parallel_times = get_pytest_parallel_times()
return test_cases, pytest_parallel_times
return test_cases
def get_tests_for_circleci(
workflow_run_id: int, workflow_run_attempt: int
) -> Tuple[List[Dict[str, Any]], Dict[Any, Any]]:
) -> List[Dict[str, Any]]:
# Parse the reports and transform them to JSON
test_cases = []
for xml_report in Path(".").glob("**/test/test-reports/**/*.xml"):
@ -185,44 +165,7 @@ def get_tests_for_circleci(
)
)
pytest_parallel_times = get_pytest_parallel_times()
return test_cases, pytest_parallel_times
def get_invoking_file_times(
test_case_summaries: List[Dict[str, Any]], pytest_parallel_times: Dict[Any, Any]
) -> List[Dict[str, Any]]:
def get_key(summary: Dict[str, Any]) -> Any:
return (
summary["invoking_file"],
summary["job_id"],
)
def init_value(summary: Dict[str, Any]) -> Any:
return {
"job_id": summary["job_id"],
"workflow_id": summary["workflow_id"],
"workflow_run_attempt": summary["workflow_run_attempt"],
"invoking_file": summary["invoking_file"],
"time": 0.0,
}
ret = {}
for summary in test_case_summaries:
key = get_key(summary)
if key not in ret:
ret[key] = init_value(summary)
ret[key]["time"] += summary["time"]
for key, val in ret.items():
# when running in parallel in pytest, adding the test times will not give the correct
# time used to run the file, which will make the sharding incorrect, so if the test is
# run in parallel, we take the time reported by the testsuite
if key in pytest_parallel_times:
val["time"] = pytest_parallel_times[key]
return list(ret.values())
return test_cases
def summarize_test_cases(test_cases: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@ -313,13 +256,11 @@ if __name__ == "__main__":
print(f"Workflow id is: {args.workflow_run_id}")
if args.circleci:
test_cases, pytest_parallel_times = get_tests_for_circleci(
test_cases = get_tests_for_circleci(
args.workflow_run_id, args.workflow_run_attempt
)
else:
test_cases, pytest_parallel_times = get_tests(
args.workflow_run_id, args.workflow_run_attempt
)
test_cases = get_tests(args.workflow_run_id, args.workflow_run_attempt)
# Flush stdout so that any errors in Rockset upload show up last in the logs.
sys.stdout.flush()
@ -327,9 +268,6 @@ if __name__ == "__main__":
# For PRs, only upload a summary of test_runs. This helps lower the
# volume of writes we do to Rockset.
test_case_summary = summarize_test_cases(test_cases)
invoking_file_times = get_invoking_file_times(
test_case_summary, pytest_parallel_times
)
upload_workflow_stats_to_s3(
args.workflow_run_id,
@ -338,13 +276,6 @@ if __name__ == "__main__":
test_case_summary,
)
upload_workflow_stats_to_s3(
args.workflow_run_id,
args.workflow_run_attempt,
"invoking_file_times",
invoking_file_times,
)
# Separate out the failed test cases.
# Uploading everything is too data intensive most of the time,
# but these will be just a tiny fraction.

View File

@ -13,7 +13,7 @@ class TestUploadTestStats(unittest.TestCase):
)
def test_existing_job(self) -> None:
"""Run on a known-good job and make sure we don't error and get basically okay results."""
test_cases, _ = get_tests(2561394934, 1)
test_cases = get_tests(2561394934, 1)
self.assertEqual(len(test_cases), 609873)
summary = summarize_test_cases(test_cases)
self.assertEqual(len(summary), 5068)