diff --git a/buildstockbatch/cloud/docker_base.py b/buildstockbatch/cloud/docker_base.py index 3176a23a..50e17aae 100644 --- a/buildstockbatch/cloud/docker_base.py +++ b/buildstockbatch/cloud/docker_base.py @@ -20,6 +20,7 @@ import logging import math import os +import pandas as pd import pathlib import random import shutil @@ -67,6 +68,9 @@ def __init__(self, project_filename): logger.error("The docker server did not respond, make sure Docker Desktop is started then retry.") raise RuntimeError("The docker server did not respond, make sure Docker Desktop is started then retry.") + def get_fs(self): + return LocalFileSystem() + @staticmethod def validate_project(project_file): super(DockerBatchBase, DockerBatchBase).validate_project(project_file) @@ -474,3 +478,52 @@ def run_simulations(cls, cfg, job_id, jobs_d, sim_dir, fs, output_path): shutil.rmtree(item) elif os.path.isfile(item): os.remove(item) + + def log_summary(self): + """ + Log a summary of how many simulations succeeded, failed, or ended with other statuses. + Uses the `completed_status` column of the files in results/parquet/.../results_up*.parquet. + """ + fs = self.get_fs() + # Summary of simulation statuses across all upgrades + status_summary = {} + total_counts = collections.defaultdict(int) + + results_glob = f"{self.results_dir}/parquet/**/results_up*.parquet" + try: + results_files = fs.glob(results_glob) + except FileNotFoundError: + logger.info(f"No results parquet files found at {results_glob}") + return + + for result in results_files: + upgrade_id = result.split(".")[0][-2:] + with fs.open(result) as f: + df = pd.read_parquet(f, columns=["completed_status"]) + # Dict mapping from status (e.g. "Success") to count + statuses = df.groupby("completed_status").size().to_dict() + status_summary[upgrade_id] = statuses + for status, count in statuses.items(): + total_counts[status] += count + + # Always include these statuses and show them first + always_use = ["Success", "Fail"] + all_statuses = always_use + list(total_counts.keys() - set(always_use)) + s = "Final status of all simulations:" + for upgrade, counts in status_summary.items(): + if upgrade == "00": + s += "\nBaseline " + else: + s += f"\nUpgrade {upgrade} " + for status in all_statuses: + s += f"{status}: {counts.get(status, 0):<7d} " + + s += "\n\nTotal " + for status in all_statuses: + s += f"{status}: {total_counts.get(status, 0):<7d} " + s += "\n" + + for upgrade in postprocessing.get_upgrade_list(self.cfg): + if f"{upgrade:02d}" not in status_summary: + s += f"\nNo results found for Upgrade {upgrade}" + logger.info(s) diff --git a/buildstockbatch/test/test_docker_base.py b/buildstockbatch/test/test_docker_base.py index 88e70f1a..1f1e089b 100644 --- a/buildstockbatch/test/test_docker_base.py +++ b/buildstockbatch/test/test_docker_base.py @@ -3,6 +3,7 @@ from fsspec.implementations.local import LocalFileSystem import gzip import json +import logging import os import pathlib import shutil @@ -148,3 +149,28 @@ def test_run_simulations(basic_residential_project_file): # Check that files were cleaned up correctly assert not os.listdir(sim_dir) os.chdir(old_cwd) + + +def test_log_summary(basic_residential_project_file, mocker, caplog): + """ + Test logging a summary of simulation statuses. + """ + project_filename, results_dir = basic_residential_project_file() + + mocker.patch.object(DockerBatchBase, "results_dir", results_dir) + dbb = DockerBatchBase(project_filename) + # Add results CSV files + shutil.copytree( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "test_results", + "parquet", + ), + os.path.join(results_dir, "parquet"), + ) + + with caplog.at_level(logging.INFO): + dbb.log_summary() + assert "Upgrade 01 Success: 4 Fail: 0" in caplog.text + assert "Baseline Success: 4 Fail: 0" in caplog.text + assert "Total Success: 8 Fail: 0" in caplog.text diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst index b5098204..e6161959 100644 --- a/docs/changelog/changelog_dev.rst +++ b/docs/changelog/changelog_dev.rst @@ -42,6 +42,12 @@ Development Changelog Major update to get AWS Batch run environment working. + .. change:: + :tags: general + :pullreq: 435 + + Add helper to log a summary of how many simulations succeeded and failed at the end of a job. + .. change:: :tags: general, feature :pullreq: 437