diff --git a/lib/phase_stats.py b/lib/phase_stats.py index 587d895b..fbf11c87 100644 --- a/lib/phase_stats.py +++ b/lib/phase_stats.py @@ -158,15 +158,13 @@ def build_and_store_phase_stats(run_id, sci=None): else: network_io_co2_in_ug = decimal.Decimal(0) - if sci.get('EL', None) is not None and sci.get('TE', None) is not None and sci.get('RS', None) is not None: - duration_in_years = duration_in_s * 60 * 60 * 24 * 365 - embodied_carbon_share_g = (duration_in_years / sci.get('EL', None) ) * sci.get('TE', None) * sci.get('RS', None) + duration_in_years = duration_in_s / (60 * 60 * 24 * 365) + embodied_carbon_share_g = (duration_in_years / sci['EL'] ) * sci['TE'] * sci['RS'] embodied_carbon_share_ug = decimal.Decimal(embodied_carbon_share_g * 1_000_000) csv_buffer.write(generate_csv_line(run_id, 'embodied_carbon_share_machine', '[SYSTEM]', f"{idx:03}_{phase['name']}", embodied_carbon_share_ug, 'TOTAL', None, None, 'ug')) - if phase['name'] == '[RUNTIME]' and machine_co2_in_ug is not None and sci is not None \ - and sci.get('R', None) is not None and sci['R'] != 0: + if phase['name'] == '[RUNTIME]' and machine_co2_in_ug is not None and sci is not None and sci.get('R', 0) != 0: csv_buffer.write(generate_csv_line(run_id, 'software_carbon_intensity_global', '[SYSTEM]', f"{idx:03}_{phase['name']}", (machine_co2_in_ug + embodied_carbon_share_ug + network_io_co2_in_ug) / sci['R'], 'TOTAL', None, None, f"ugCO2e/{sci['R_d']}")) if machine_power_idle and cpu_utilization_machine and cpu_utilization_containers: diff --git a/runner.py b/runner.py index e0892f28..0ee49e89 100755 --- a/runner.py +++ b/runner.py @@ -79,7 +79,10 @@ def __init__(self, self._tmp_folder = Path('/tmp/green-metrics-tool').resolve() # since linux has /tmp and macos /private/tmp self._usage_scenario = {} self._architecture = utils.get_architecture() + self._sci = {'R_d': None, 'R': 0} + self._sci |= GlobalConfig().config.get('sci', None) # merge in data from machine config like I, TE etc. + self._job_id = job_id self._arguments = locals() self._repo_folder = f"{self._tmp_folder}/repo" # default if not changed in checkout_repository @@ -461,7 +464,7 @@ def update_and_insert_specs(self): measurement_config = {} measurement_config['providers'] = utils.get_metric_providers(config) - measurement_config['sci'] = config.get('sci', None) + measurement_config['sci'] = self._sci # Insert auxilary info for the run. Not critical. DB().query(""" diff --git a/test-config.yml b/test-config.yml index 66fd58fb..46ab2509 100644 --- a/test-config.yml +++ b/test-config.yml @@ -93,10 +93,10 @@ measurement: Hardware_Availability_Year: 2011 sci: - EL: 3.5 + EL: 4 RS: 1 - TE: 194000 - I: 475 + TE: 181000 + I: 436 optimization: ignore: diff --git a/tests/metric_providers/test_metric_providers.py b/tests/metric_providers/test_metric_providers.py index 4c6a9b41..a1203409 100644 --- a/tests/metric_providers/test_metric_providers.py +++ b/tests/metric_providers/test_metric_providers.py @@ -142,9 +142,7 @@ def test_network_providers(): assert seen_network_total_procfs_system is True -def test_cpu_memory_providers(): - if utils.get_architecture() == 'macos': - return +def test_cpu_memory_carbon_providers(): assert(run_id is not None and run_id != '') @@ -152,6 +150,7 @@ def test_cpu_memory_providers(): SELECT metric, detail_name, value, unit, max_value FROM phase_stats WHERE run_id = %s and phase = '006_VM Stress' + ORDER BY metric DESC -- this will assure that the phase_time metric will come first and can be saved """ data = DB().fetch_all(query, (run_id,), fetch_mode='dict') @@ -159,9 +158,11 @@ def test_cpu_memory_providers(): ## get the current used disj seen_phase_time_syscall_system = False - seen_cpu_utilization_procfs_system = False + seen_cpu_utilization = False seen_memory_used_procfs_system = False + seen_embodied_carbon_share_machine = False MICROSECONDS = 1_000_000 + phase_time = None for metric_provider in data: metric = metric_provider['metric'] @@ -172,16 +173,37 @@ def test_cpu_memory_providers(): assert 9000 < val <= 10000 , f"cpu_utilization_procfs_system is not between 90_00 and 100_00 but {metric_provider['value']} {metric_provider['unit']}" assert 9500 < max_value <= 10500 , f"cpu_utilization_procfs_system max is not between 95_00 and 105_00 but {metric_provider['value']} {metric_provider['unit']}" - seen_cpu_utilization_procfs_system = True + seen_cpu_utilization = True + elif metric == 'cpu_utilization_mach_system': # macOS values do not get as high due to the VM. + assert 5500 < val <= 10000 , f"cpu_utilization_mach_system is not between 90_00 and 100_00 but {metric_provider['value']} {metric_provider['unit']}" + assert 8000 < max_value <= 10500 , f"cpu_utilization_mach_system max is not between 95_00 and 105_00 but {metric_provider['value']} {metric_provider['unit']}" + + seen_cpu_utilization = True + elif metric == 'memory_used_procfs_system': - if not os.getenv("GITHUB_ACTIONS") == "true": # skip test for GitHub Actions VM. Memory seems weirdly assigned here + if not os.getenv("GITHUB_ACTIONS") == "true" and utils.get_architecture() != 'macos': # skip test for GitHub Actions VM. Memory seems weirdly assigned here. Also skip macos assert psutil.virtual_memory().total*0.55 <= val <= psutil.virtual_memory().total * 0.65 , f"memory_used_procfs_system avg is not between 55% and 65% of total memory but {metric_provider['value']} {metric_provider['unit']}" seen_memory_used_procfs_system = True elif metric == 'phase_time_syscall_system': assert 5*MICROSECONDS < val < 5.5*MICROSECONDS , f"phase_time_syscall_system is not between 5 and 5.5 s but {metric_provider['value']} {metric_provider['unit']}" seen_phase_time_syscall_system = True + phase_time = val + + elif metric == 'embodied_carbon_share_machine': + # we have the phase time value as we sort by metric DESC + phase_time_in_years = phase_time / (MICROSECONDS * 60 * 60 * 24 * 365) + sci = {"EL": 4, "TE": 181000, "RS": 1} + embodied_carbon_expected = int((phase_time_in_years / sci['EL']) * sci['TE'] * sci['RS'] * 1_000_000) + # Make a range because of rounding errors + assert embodied_carbon_expected*0.99 < val < embodied_carbon_expected*1.01 , f"embodied_carbon_share_machine is not {embodied_carbon_expected} but {metric_provider['value']} {metric_provider['unit']}\n. This might be also because the values in the test are hardcoded. Check reporter but also if test-config.yml configuration is still accurate" + seen_embodied_carbon_share_machine = True + + assert seen_phase_time_syscall_system is True, "Did not see seen_phase_time_syscall_system metric" + assert seen_cpu_utilization is True, "Did not see seen_cpu_utilization metric" + assert seen_embodied_carbon_share_machine is True, "Did not see seen_embodied_carbon_share_machine metric" + + if utils.get_architecture() == 'macos': # skip following test for macos as we do not have that provider there + return - assert seen_phase_time_syscall_system is True - assert seen_cpu_utilization_procfs_system is True - assert seen_memory_used_procfs_system is True + assert seen_memory_used_procfs_system is True, "Did not see seen_memory_used_procfs_system metric" diff --git a/tools/rebuild_phase_stats.py b/tools/rebuild_phase_stats.py index 99e57239..718f04d6 100644 --- a/tools/rebuild_phase_stats.py +++ b/tools/rebuild_phase_stats.py @@ -11,23 +11,23 @@ from lib.db import DB if __name__ == '__main__': - print('This will remove ALL phase_stats and completely rebuild them. No data will get lost, but it will take some time. Continue? (y/N)') + print('This will remove ALL phase_stats and completely rebuild them. Not data will get lost, but it will take some time. Continue? (y/N)') answer = sys.stdin.readline() if answer.strip().lower() == 'y': print('Deleting old phase_stats ...') DB().query('DELETE FROM phase_stats') print('Fetching runs ...') query = ''' - SELECT id + SELECT id, measurement_config FROM runs - WHERE - end_measurement IS NOT NULL AND phases IS NOT NULL + WHERE end_measurement IS NOT NULL AND phases IS NOT NULL + ''' - runs = DB().fetch_all(query) + runs = DB().fetch_all(query, fetch_mode='dict') - print(f"Fetched {len(runs)} runs. Commencing ...") - for idx, run_id in enumerate(runs): - print(f"Rebuilding phase_stats for run #{idx} {run_id[0]}") - build_and_store_phase_stats(run_id[0]) + print(f"Fetched {len(runs)} runs. Commencing ...") + for idx, data in enumerate(runs): + print(f"Rebuilding phase_stats for run #{idx} {data['id']}") + build_and_store_phase_stats(data['id'], data['measurement_config']['sci']) print('Done')