Merge pull request #939 from green-coding-solutions/fix-embodied-carbon

Missing embodied carbon values
green-coding-solutions · Oct 7, 2024 · de56353 · de56353
2 parents 438c71a + cf05574
commit de56353
Show file tree

Hide file tree

Showing 6 changed files with 53 additions and 30 deletions.
diff --git a/api/api_helpers.py b/api/api_helpers.py
@@ -732,14 +732,14 @@ def get_carbon_intensity(latitude, longitude):
     if db_data is not None and len(db_data) != 0:
         return db_data[0][2].get('carbonIntensity')
 
-    if not (token := GlobalConfig().config.get('electricity_maps_token')):
+    if not (electricitymaps_token := GlobalConfig().config.get('electricity_maps_token')):
         raise ValueError('You need to specify an electricitymap token in the config!')
 
-    if token == 'testing':
+    if electricitymaps_token == 'testing':
         # If we are running tests we always return 1000
         return 1000
 
-    headers = {'auth-token': token }
+    headers = {'auth-token': electricitymaps_token }
     params = {'lat': latitude, 'lon': longitude }
 
     response = requests.get('https://api.electricitymap.org/v3/carbon-intensity/latest', params=params, headers=headers, timeout=10)

diff --git a/lib/phase_stats.py b/lib/phase_stats.py
@@ -158,15 +158,13 @@ def build_and_store_phase_stats(run_id, sci=None):
         else:
             network_io_co2_in_ug = decimal.Decimal(0)
 
-
         if sci.get('EL', None) is not None and sci.get('TE', None) is not None and sci.get('RS', None) is not None:
-            duration_in_years = duration_in_s * 60 * 60 * 24 * 365
-            embodied_carbon_share_g = (duration_in_years / sci.get('EL', None) ) * sci.get('TE', None) * sci.get('RS', None)
+            duration_in_years = duration_in_s / (60 * 60 * 24 * 365)
+            embodied_carbon_share_g = (duration_in_years / sci['EL'] ) * sci['TE'] * sci['RS']
             embodied_carbon_share_ug = decimal.Decimal(embodied_carbon_share_g * 1_000_000)
             csv_buffer.write(generate_csv_line(run_id, 'embodied_carbon_share_machine', '[SYSTEM]', f"{idx:03}_{phase['name']}", embodied_carbon_share_ug, 'TOTAL', None, None, 'ug'))
 
-        if phase['name'] == '[RUNTIME]' and machine_co2_in_ug is not None and sci is not None \
-                         and sci.get('R', None) is not None and sci['R'] != 0:
+        if phase['name'] == '[RUNTIME]' and machine_co2_in_ug is not None and sci is not None and sci.get('R', 0) != 0:
             csv_buffer.write(generate_csv_line(run_id, 'software_carbon_intensity_global', '[SYSTEM]', f"{idx:03}_{phase['name']}", (machine_co2_in_ug + embodied_carbon_share_ug + network_io_co2_in_ug) / sci['R'], 'TOTAL', None, None, f"ugCO2e/{sci['R_d']}"))
 
         if machine_power_idle and cpu_utilization_machine and cpu_utilization_containers:

diff --git a/runner.py b/runner.py
@@ -79,7 +79,10 @@ def __init__(self,
         self._tmp_folder = Path('/tmp/green-metrics-tool').resolve() # since linux has /tmp and macos /private/tmp
         self._usage_scenario = {}
         self._architecture = utils.get_architecture()
+
         self._sci = {'R_d': None, 'R': 0}
+        self._sci |= GlobalConfig().config.get('sci', None)  # merge in data from machine config like I, TE etc.
+
         self._job_id = job_id
         self._arguments = locals()
         self._repo_folder = f"{self._tmp_folder}/repo" # default if not changed in checkout_repository
@@ -461,7 +464,7 @@ def update_and_insert_specs(self):
 
         measurement_config = {}
         measurement_config['providers'] = utils.get_metric_providers(config)
-        measurement_config['sci'] = config.get('sci', None)
+        measurement_config['sci'] = self._sci
 
         # Insert auxilary info for the run. Not critical.
         DB().query("""

diff --git a/test-config.yml b/test-config.yml
@@ -93,10 +93,10 @@ measurement:
         Hardware_Availability_Year: 2011
 
 sci:
-  EL: 3.5
+  EL: 4
   RS: 1
-  TE: 194000
-  I: 475
+  TE: 181000
+  I: 436
 
 optimization:
   ignore:

diff --git a/tests/metric_providers/test_metric_providers.py b/tests/metric_providers/test_metric_providers.py
@@ -142,26 +142,27 @@ def test_network_providers():
 
     assert seen_network_total_procfs_system is True
 
-def test_cpu_memory_providers():
-    if utils.get_architecture() == 'macos':
-        return
+def test_cpu_memory_carbon_providers():
 
     assert(run_id is not None and run_id != '')
 
     query = """
             SELECT metric, detail_name, value, unit, max_value
             FROM phase_stats
             WHERE run_id = %s and phase = '006_VM Stress'
+            ORDER BY metric DESC -- this will assure that the phase_time metric will come first and can be saved
             """
 
     data = DB().fetch_all(query, (run_id,), fetch_mode='dict')
     assert(data is not None and data != [])
 
     ## get the current used disj
     seen_phase_time_syscall_system = False
-    seen_cpu_utilization_procfs_system = False
+    seen_cpu_utilization = False
     seen_memory_used_procfs_system = False
+    seen_embodied_carbon_share_machine = False
     MICROSECONDS = 1_000_000
+    phase_time = None
 
     for metric_provider in data:
         metric = metric_provider['metric']
@@ -172,16 +173,37 @@ def test_cpu_memory_providers():
             assert 9000 < val <= 10000 , f"cpu_utilization_procfs_system is not between 90_00 and 100_00 but {metric_provider['value']} {metric_provider['unit']}"
             assert 9500 < max_value <= 10500 , f"cpu_utilization_procfs_system max is not between 95_00 and 105_00 but {metric_provider['value']} {metric_provider['unit']}"
 
-            seen_cpu_utilization_procfs_system = True
+            seen_cpu_utilization = True
+        elif metric == 'cpu_utilization_mach_system': # macOS values do not get as high due to the VM.
+            assert 5500 < val <= 10000 , f"cpu_utilization_mach_system is not between 90_00 and 100_00 but {metric_provider['value']} {metric_provider['unit']}"
+            assert 8000 < max_value <= 10500 , f"cpu_utilization_mach_system max is not between 95_00 and 105_00 but {metric_provider['value']} {metric_provider['unit']}"
+
+            seen_cpu_utilization = True
+
         elif metric == 'memory_used_procfs_system':
-            if not os.getenv("GITHUB_ACTIONS") == "true": # skip test for GitHub Actions VM. Memory seems weirdly assigned here
+            if not os.getenv("GITHUB_ACTIONS") == "true" and utils.get_architecture() != 'macos': # skip test for GitHub Actions VM. Memory seems weirdly assigned here. Also skip macos
                 assert psutil.virtual_memory().total*0.55 <= val <= psutil.virtual_memory().total * 0.65 , f"memory_used_procfs_system avg is not between 55% and 65% of total memory but {metric_provider['value']} {metric_provider['unit']}"
 
             seen_memory_used_procfs_system = True
         elif metric == 'phase_time_syscall_system':
             assert 5*MICROSECONDS < val < 5.5*MICROSECONDS , f"phase_time_syscall_system is not between 5 and 5.5 s but {metric_provider['value']} {metric_provider['unit']}"
             seen_phase_time_syscall_system = True
+            phase_time = val
+
+        elif metric == 'embodied_carbon_share_machine':
+            # we have the phase time value as we sort by metric DESC
+            phase_time_in_years = phase_time / (MICROSECONDS * 60 * 60 * 24 * 365)
+            sci = {"EL": 4, "TE": 181000, "RS": 1}
+            embodied_carbon_expected = int((phase_time_in_years / sci['EL']) * sci['TE'] * sci['RS'] * 1_000_000)
+            # Make a range because of rounding errors
+            assert embodied_carbon_expected*0.99 < val < embodied_carbon_expected*1.01  , f"embodied_carbon_share_machine is not {embodied_carbon_expected} but {metric_provider['value']} {metric_provider['unit']}\n. This might be also because the values in the test are hardcoded. Check reporter but also if test-config.yml configuration is still accurate"
+            seen_embodied_carbon_share_machine = True
+
+    assert seen_phase_time_syscall_system is True, "Did not see seen_phase_time_syscall_system metric"
+    assert seen_cpu_utilization is True, "Did not see seen_cpu_utilization metric"
+    assert seen_embodied_carbon_share_machine is True, "Did not see seen_embodied_carbon_share_machine metric"
+
+    if utils.get_architecture() == 'macos': # skip following test for macos as we do not have that provider there
+        return
 
-    assert seen_phase_time_syscall_system is True
-    assert seen_cpu_utilization_procfs_system is True
-    assert seen_memory_used_procfs_system is True
+    assert seen_memory_used_procfs_system is True, "Did not see seen_memory_used_procfs_system metric"
diff --git a/tools/rebuild_phase_stats.py b/tools/rebuild_phase_stats.py
@@ -11,23 +11,23 @@
 from lib.db import DB
 
 if __name__ == '__main__':
-    print('This will remove ALL phase_stats and completely rebuild them. No data will get lost, but it will take some time. Continue? (y/N)')
+    print('This will remove ALL phase_stats and completely rebuild them. Not data will get lost, but it will take some time. Continue? (y/N)')
     answer = sys.stdin.readline()
     if answer.strip().lower() == 'y':
         print('Deleting old phase_stats ...')
         DB().query('DELETE FROM phase_stats')
         print('Fetching runs ...')
         query = '''
-            SELECT id
+            SELECT id, measurement_config
             FROM runs
-            WHERE
-                end_measurement IS NOT NULL AND phases IS NOT NULL
+            WHERE end_measurement IS NOT NULL AND phases IS NOT NULL
+
         '''
-        runs = DB().fetch_all(query)
+        runs = DB().fetch_all(query, fetch_mode='dict')
 
-        print(f"Fetched {len(runs)} runs. Commencing ...")
-        for idx, run_id in enumerate(runs):
 
-            print(f"Rebuilding phase_stats for run #{idx} {run_id[0]}")
-            build_and_store_phase_stats(run_id[0])
+        print(f"Fetched {len(runs)} runs. Commencing ...")
+        for idx, data in enumerate(runs):
+            print(f"Rebuilding phase_stats for run #{idx} {data['id']}")
+            build_and_store_phase_stats(data['id'], data['measurement_config']['sci'])
         print('Done')