Skip to content

Commit

Permalink
[ez] Remove unused code in upload_test_stats (pytorch#111504)
Browse files Browse the repository at this point in the history
This is code related to parallelism and test times that isn't used, so remove it.

Tested by running locally with `python3 -m tools.stats.upload_test_stats --workflow-run-id 6551035874 --workflow-run-attempt 1 --head-branch main --head-repository "pytorch/pytorch"` and commenting out parts for uploading to s3.
Pull Request resolved: pytorch#111504
Approved by: https://github.com/huydhn
  • Loading branch information
clee2000 authored and pytorchmergebot committed Oct 19, 2023
1 parent 4e310fd commit 0617f7f
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 84 deletions.
1 change: 1 addition & 0 deletions .additional_ci_files/slow-tests.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"test_autocast (__main__.TestPartitioning)": 90.782, "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 67.44933333333334, "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 89.746, "test_comprehensive_nn_functional_unfold_cuda_complex128 (__main__.TestDecompCUDA)": 108.58233333333332, "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 75.33933333333334, "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 61.149, "test_memory_format_operators_cuda (__main__.TestTorchDeviceTypeCUDA)": 69.38900000000001, "test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 218.42066666666665, "test_quick_core_backward_unsafe_split_cuda_float64 (__main__.TestDecompCUDA)": 217.23433333333332, "test_retracibility_dynamic_shapes (__main__.DynamicShapesExportTests)": 367.1793333333333, "test_rnn_decomp_module_nn_LSTM_train_mode_cuda_float32 (__main__.TestDecompCUDA)": 128.60033333333334, "test_rnn_decomp_module_nn_LSTM_train_mode_cuda_float64 (__main__.TestDecompCUDA)": 126.46566666666666, "test_transpose_copy (__main__.CPUReproTests)": 60.71444444444445, "test_variant_consistency_jit_nn_functional_max_pool1d_cuda_float32 (__main__.TestJitCUDA)": 72.77266666666667, "test_variant_consistency_jit_svd_cuda_complex64 (__main__.TestJitCUDA)": 87.36566666666666, "test_views1_dynamic_shapes_cuda (__main__.DynamicShapesCudaTests)": 62.47400000000001, "test_vmapjvpvjp_nn_functional_poisson_nll_loss_cuda_float32 (__main__.TestOperatorsCUDA)": 66.62266666666666, "test_vmapvjpvjp_broadcast_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 93.915, "test_vmapvjpvjp_linalg_solve_triangular_cuda_float32 (__main__.TestOperatorsCUDA)": 80.409, "test_vmapvjpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 70.152, "test_vmapvjpvjp_nn_functional_conv2d_cuda_float32 (__main__.TestOperatorsCUDA)": 67.51066666666667, "test_vmapvjpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 60.07633333333333, "test_warp_softmax_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 70.2475, "test_warp_softmax_64bit_indexing_cuda_float32 (__main__.TestNNDeviceTypeCUDA)": 77.0975}
1 change: 1 addition & 0 deletions .additional_ci_files/test-times.json

Large diffs are not rendered by default.

97 changes: 14 additions & 83 deletions tools/stats/upload_test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import xml.etree.ElementTree as ET
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List, Optional

from tools.stats.upload_stats_lib import (
download_gha_artifacts,
Expand All @@ -14,13 +14,16 @@
)


def get_job_id(report: Path) -> int:
def get_job_id(report: Path) -> Optional[int]:
# [Job id in artifacts]
# Retrieve the job id from the report path. In our GHA workflows, we append
# the job id to the end of the report name, so `report` looks like:
# unzipped-test-reports-foo_5596745227/test/test-reports/foo/TEST-foo.xml
# and we want to get `5596745227` out of it.
return int(report.parts[0].rpartition("_")[2])
try:
return int(report.parts[0].rpartition("_")[2])
except ValueError:
return None


def parse_xml_report(
Expand All @@ -32,12 +35,8 @@ def parse_xml_report(
"""Convert a test report xml file into a JSON-serializable list of test cases."""
print(f"Parsing {tag}s for test report: {report}")

try:
job_id = get_job_id(report)
print(f"Found job id: {job_id}")
except Exception:
job_id = None
print("Failed to find job id")
job_id = get_job_id(report)
print(f"Found job id: {job_id}")

test_cases: List[Dict[str, Any]] = []

Expand Down Expand Up @@ -121,24 +120,7 @@ def process_xml_element(element: ET.Element) -> Dict[str, Any]:
return ret


def get_pytest_parallel_times() -> Dict[Any, Any]:
pytest_parallel_times: Dict[Any, Any] = {}
for report in Path(".").glob("**/python-pytest/**/*.xml"):
invoking_file = report.parent.name

root = ET.parse(report)

assert len(list(root.iter("testsuite"))) == 1
for test_suite in root.iter("testsuite"):
pytest_parallel_times[
(invoking_file, get_job_id(report))
] = test_suite.attrib["time"]
return pytest_parallel_times


def get_tests(
workflow_run_id: int, workflow_run_attempt: int
) -> Tuple[List[Dict[str, Any]], Dict[Any, Any]]:
def get_tests(workflow_run_id: int, workflow_run_attempt: int) -> List[Dict[str, Any]]:
with TemporaryDirectory() as temp_dir:
print("Using temporary directory:", temp_dir)
os.chdir(temp_dir)
Expand Down Expand Up @@ -168,14 +150,12 @@ def get_tests(
)
)

pytest_parallel_times = get_pytest_parallel_times()

return test_cases, pytest_parallel_times
return test_cases


def get_tests_for_circleci(
workflow_run_id: int, workflow_run_attempt: int
) -> Tuple[List[Dict[str, Any]], Dict[Any, Any]]:
) -> List[Dict[str, Any]]:
# Parse the reports and transform them to JSON
test_cases = []
for xml_report in Path(".").glob("**/test/test-reports/**/*.xml"):
Expand All @@ -185,44 +165,7 @@ def get_tests_for_circleci(
)
)

pytest_parallel_times = get_pytest_parallel_times()

return test_cases, pytest_parallel_times


def get_invoking_file_times(
test_case_summaries: List[Dict[str, Any]], pytest_parallel_times: Dict[Any, Any]
) -> List[Dict[str, Any]]:
def get_key(summary: Dict[str, Any]) -> Any:
return (
summary["invoking_file"],
summary["job_id"],
)

def init_value(summary: Dict[str, Any]) -> Any:
return {
"job_id": summary["job_id"],
"workflow_id": summary["workflow_id"],
"workflow_run_attempt": summary["workflow_run_attempt"],
"invoking_file": summary["invoking_file"],
"time": 0.0,
}

ret = {}
for summary in test_case_summaries:
key = get_key(summary)
if key not in ret:
ret[key] = init_value(summary)
ret[key]["time"] += summary["time"]

for key, val in ret.items():
# when running in parallel in pytest, adding the test times will not give the correct
# time used to run the file, which will make the sharding incorrect, so if the test is
# run in parallel, we take the time reported by the testsuite
if key in pytest_parallel_times:
val["time"] = pytest_parallel_times[key]

return list(ret.values())
return test_cases


def summarize_test_cases(test_cases: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
Expand Down Expand Up @@ -313,23 +256,18 @@ def init_value(test_case: Dict[str, Any]) -> Dict[str, Any]:
print(f"Workflow id is: {args.workflow_run_id}")

if args.circleci:
test_cases, pytest_parallel_times = get_tests_for_circleci(
test_cases = get_tests_for_circleci(
args.workflow_run_id, args.workflow_run_attempt
)
else:
test_cases, pytest_parallel_times = get_tests(
args.workflow_run_id, args.workflow_run_attempt
)
test_cases = get_tests(args.workflow_run_id, args.workflow_run_attempt)

# Flush stdout so that any errors in Rockset upload show up last in the logs.
sys.stdout.flush()

# For PRs, only upload a summary of test_runs. This helps lower the
# volume of writes we do to Rockset.
test_case_summary = summarize_test_cases(test_cases)
invoking_file_times = get_invoking_file_times(
test_case_summary, pytest_parallel_times
)

upload_workflow_stats_to_s3(
args.workflow_run_id,
Expand All @@ -338,13 +276,6 @@ def init_value(test_case: Dict[str, Any]) -> Dict[str, Any]:
test_case_summary,
)

upload_workflow_stats_to_s3(
args.workflow_run_id,
args.workflow_run_attempt,
"invoking_file_times",
invoking_file_times,
)

# Separate out the failed test cases.
# Uploading everything is too data intensive most of the time,
# but these will be just a tiny fraction.
Expand Down
2 changes: 1 addition & 1 deletion tools/test/test_upload_test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class TestUploadTestStats(unittest.TestCase):
)
def test_existing_job(self) -> None:
"""Run on a known-good job and make sure we don't error and get basically okay results."""
test_cases, _ = get_tests(2561394934, 1)
test_cases = get_tests(2561394934, 1)
self.assertEqual(len(test_cases), 609873)
summary = summarize_test_cases(test_cases)
self.assertEqual(len(summary), 5068)
Expand Down

0 comments on commit 0617f7f

Please sign in to comment.