mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[ez] Remove unused code in upload_test_stats (#111504)
This is code related to parallelism and test times that isn't used, so remove it. Tested by running locally with `python3 -m tools.stats.upload_test_stats --workflow-run-id 6551035874 --workflow-run-attempt 1 --head-branch main --head-repository "pytorch/pytorch"` and commenting out parts for uploading to s3. Pull Request resolved: https://github.com/pytorch/pytorch/pull/111504 Approved by: https://github.com/huydhn
This commit is contained in:
committed by
PyTorch MergeBot
parent
4e310fd875
commit
0617f7fa75
1
.additional_ci_files/slow-tests.json
Normal file
1
.additional_ci_files/slow-tests.json
Normal file
@ -0,0 +1 @@
|
||||
{"test_autocast (__main__.TestPartitioning)": 90.782, "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 67.44933333333334, "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 89.746, "test_comprehensive_nn_functional_unfold_cuda_complex128 (__main__.TestDecompCUDA)": 108.58233333333332, "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 75.33933333333334, "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 61.149, "test_memory_format_operators_cuda (__main__.TestTorchDeviceTypeCUDA)": 69.38900000000001, "test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 218.42066666666665, "test_quick_core_backward_unsafe_split_cuda_float64 (__main__.TestDecompCUDA)": 217.23433333333332, "test_retracibility_dynamic_shapes (__main__.DynamicShapesExportTests)": 367.1793333333333, "test_rnn_decomp_module_nn_LSTM_train_mode_cuda_float32 (__main__.TestDecompCUDA)": 128.60033333333334, "test_rnn_decomp_module_nn_LSTM_train_mode_cuda_float64 (__main__.TestDecompCUDA)": 126.46566666666666, "test_transpose_copy (__main__.CPUReproTests)": 60.71444444444445, "test_variant_consistency_jit_nn_functional_max_pool1d_cuda_float32 (__main__.TestJitCUDA)": 72.77266666666667, "test_variant_consistency_jit_svd_cuda_complex64 (__main__.TestJitCUDA)": 87.36566666666666, "test_views1_dynamic_shapes_cuda (__main__.DynamicShapesCudaTests)": 62.47400000000001, "test_vmapjvpvjp_nn_functional_poisson_nll_loss_cuda_float32 (__main__.TestOperatorsCUDA)": 66.62266666666666, "test_vmapvjpvjp_broadcast_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 93.915, "test_vmapvjpvjp_linalg_solve_triangular_cuda_float32 (__main__.TestOperatorsCUDA)": 80.409, "test_vmapvjpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 70.152, "test_vmapvjpvjp_nn_functional_conv2d_cuda_float32 (__main__.TestOperatorsCUDA)": 67.51066666666667, "test_vmapvjpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 60.07633333333333, "test_warp_softmax_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 70.2475, "test_warp_softmax_64bit_indexing_cuda_float32 (__main__.TestNNDeviceTypeCUDA)": 77.0975}
|
1
.additional_ci_files/test-times.json
Normal file
1
.additional_ci_files/test-times.json
Normal file
File diff suppressed because one or more lines are too long
@ -4,7 +4,7 @@ import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import Any, Dict, List, Tuple
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from tools.stats.upload_stats_lib import (
|
||||
download_gha_artifacts,
|
||||
@ -14,13 +14,16 @@ from tools.stats.upload_stats_lib import (
|
||||
)
|
||||
|
||||
|
||||
def get_job_id(report: Path) -> int:
|
||||
def get_job_id(report: Path) -> Optional[int]:
|
||||
# [Job id in artifacts]
|
||||
# Retrieve the job id from the report path. In our GHA workflows, we append
|
||||
# the job id to the end of the report name, so `report` looks like:
|
||||
# unzipped-test-reports-foo_5596745227/test/test-reports/foo/TEST-foo.xml
|
||||
# and we want to get `5596745227` out of it.
|
||||
return int(report.parts[0].rpartition("_")[2])
|
||||
try:
|
||||
return int(report.parts[0].rpartition("_")[2])
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def parse_xml_report(
|
||||
@ -32,12 +35,8 @@ def parse_xml_report(
|
||||
"""Convert a test report xml file into a JSON-serializable list of test cases."""
|
||||
print(f"Parsing {tag}s for test report: {report}")
|
||||
|
||||
try:
|
||||
job_id = get_job_id(report)
|
||||
print(f"Found job id: {job_id}")
|
||||
except Exception:
|
||||
job_id = None
|
||||
print("Failed to find job id")
|
||||
job_id = get_job_id(report)
|
||||
print(f"Found job id: {job_id}")
|
||||
|
||||
test_cases: List[Dict[str, Any]] = []
|
||||
|
||||
@ -121,24 +120,7 @@ def process_xml_element(element: ET.Element) -> Dict[str, Any]:
|
||||
return ret
|
||||
|
||||
|
||||
def get_pytest_parallel_times() -> Dict[Any, Any]:
|
||||
pytest_parallel_times: Dict[Any, Any] = {}
|
||||
for report in Path(".").glob("**/python-pytest/**/*.xml"):
|
||||
invoking_file = report.parent.name
|
||||
|
||||
root = ET.parse(report)
|
||||
|
||||
assert len(list(root.iter("testsuite"))) == 1
|
||||
for test_suite in root.iter("testsuite"):
|
||||
pytest_parallel_times[
|
||||
(invoking_file, get_job_id(report))
|
||||
] = test_suite.attrib["time"]
|
||||
return pytest_parallel_times
|
||||
|
||||
|
||||
def get_tests(
|
||||
workflow_run_id: int, workflow_run_attempt: int
|
||||
) -> Tuple[List[Dict[str, Any]], Dict[Any, Any]]:
|
||||
def get_tests(workflow_run_id: int, workflow_run_attempt: int) -> List[Dict[str, Any]]:
|
||||
with TemporaryDirectory() as temp_dir:
|
||||
print("Using temporary directory:", temp_dir)
|
||||
os.chdir(temp_dir)
|
||||
@ -168,14 +150,12 @@ def get_tests(
|
||||
)
|
||||
)
|
||||
|
||||
pytest_parallel_times = get_pytest_parallel_times()
|
||||
|
||||
return test_cases, pytest_parallel_times
|
||||
return test_cases
|
||||
|
||||
|
||||
def get_tests_for_circleci(
|
||||
workflow_run_id: int, workflow_run_attempt: int
|
||||
) -> Tuple[List[Dict[str, Any]], Dict[Any, Any]]:
|
||||
) -> List[Dict[str, Any]]:
|
||||
# Parse the reports and transform them to JSON
|
||||
test_cases = []
|
||||
for xml_report in Path(".").glob("**/test/test-reports/**/*.xml"):
|
||||
@ -185,44 +165,7 @@ def get_tests_for_circleci(
|
||||
)
|
||||
)
|
||||
|
||||
pytest_parallel_times = get_pytest_parallel_times()
|
||||
|
||||
return test_cases, pytest_parallel_times
|
||||
|
||||
|
||||
def get_invoking_file_times(
|
||||
test_case_summaries: List[Dict[str, Any]], pytest_parallel_times: Dict[Any, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
def get_key(summary: Dict[str, Any]) -> Any:
|
||||
return (
|
||||
summary["invoking_file"],
|
||||
summary["job_id"],
|
||||
)
|
||||
|
||||
def init_value(summary: Dict[str, Any]) -> Any:
|
||||
return {
|
||||
"job_id": summary["job_id"],
|
||||
"workflow_id": summary["workflow_id"],
|
||||
"workflow_run_attempt": summary["workflow_run_attempt"],
|
||||
"invoking_file": summary["invoking_file"],
|
||||
"time": 0.0,
|
||||
}
|
||||
|
||||
ret = {}
|
||||
for summary in test_case_summaries:
|
||||
key = get_key(summary)
|
||||
if key not in ret:
|
||||
ret[key] = init_value(summary)
|
||||
ret[key]["time"] += summary["time"]
|
||||
|
||||
for key, val in ret.items():
|
||||
# when running in parallel in pytest, adding the test times will not give the correct
|
||||
# time used to run the file, which will make the sharding incorrect, so if the test is
|
||||
# run in parallel, we take the time reported by the testsuite
|
||||
if key in pytest_parallel_times:
|
||||
val["time"] = pytest_parallel_times[key]
|
||||
|
||||
return list(ret.values())
|
||||
return test_cases
|
||||
|
||||
|
||||
def summarize_test_cases(test_cases: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
@ -313,13 +256,11 @@ if __name__ == "__main__":
|
||||
print(f"Workflow id is: {args.workflow_run_id}")
|
||||
|
||||
if args.circleci:
|
||||
test_cases, pytest_parallel_times = get_tests_for_circleci(
|
||||
test_cases = get_tests_for_circleci(
|
||||
args.workflow_run_id, args.workflow_run_attempt
|
||||
)
|
||||
else:
|
||||
test_cases, pytest_parallel_times = get_tests(
|
||||
args.workflow_run_id, args.workflow_run_attempt
|
||||
)
|
||||
test_cases = get_tests(args.workflow_run_id, args.workflow_run_attempt)
|
||||
|
||||
# Flush stdout so that any errors in Rockset upload show up last in the logs.
|
||||
sys.stdout.flush()
|
||||
@ -327,9 +268,6 @@ if __name__ == "__main__":
|
||||
# For PRs, only upload a summary of test_runs. This helps lower the
|
||||
# volume of writes we do to Rockset.
|
||||
test_case_summary = summarize_test_cases(test_cases)
|
||||
invoking_file_times = get_invoking_file_times(
|
||||
test_case_summary, pytest_parallel_times
|
||||
)
|
||||
|
||||
upload_workflow_stats_to_s3(
|
||||
args.workflow_run_id,
|
||||
@ -338,13 +276,6 @@ if __name__ == "__main__":
|
||||
test_case_summary,
|
||||
)
|
||||
|
||||
upload_workflow_stats_to_s3(
|
||||
args.workflow_run_id,
|
||||
args.workflow_run_attempt,
|
||||
"invoking_file_times",
|
||||
invoking_file_times,
|
||||
)
|
||||
|
||||
# Separate out the failed test cases.
|
||||
# Uploading everything is too data intensive most of the time,
|
||||
# but these will be just a tiny fraction.
|
||||
|
@ -13,7 +13,7 @@ class TestUploadTestStats(unittest.TestCase):
|
||||
)
|
||||
def test_existing_job(self) -> None:
|
||||
"""Run on a known-good job and make sure we don't error and get basically okay results."""
|
||||
test_cases, _ = get_tests(2561394934, 1)
|
||||
test_cases = get_tests(2561394934, 1)
|
||||
self.assertEqual(len(test_cases), 609873)
|
||||
summary = summarize_test_cases(test_cases)
|
||||
self.assertEqual(len(summary), 5068)
|
||||
|
Reference in New Issue
Block a user