Skip to content

Commit

Permalink
Merge pull request #1226 from SpiNNakerManchester/custom_energy_monit…
Browse files Browse the repository at this point in the history
…oring

Custom energy monitoring
  • Loading branch information
Christian-B authored Nov 11, 2024
2 parents a0c1b41 + 95efacb commit 81f4f24
Show file tree
Hide file tree
Showing 12 changed files with 644 additions and 1,144 deletions.
20 changes: 12 additions & 8 deletions c_common/models/chip_power_monitor/src/chip_power_monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ struct sample_params {
uint32_t frequency;
};

struct recording {
uint32_t time;
uint32_t core_counters[NUM_CPUS];
};

//! \brief The recording channel we use.
//!
//! Only one recording channel is used by this application.
Expand All @@ -73,7 +78,7 @@ static uint32_t time;
static uint32_t timer = 0;

//! Where we aggregate the sample activity counts.
static uint32_t core_counters[NUM_CPUS];
static struct recording recording;
//! How many samples have we done so far within this aggregate step?
static uint32_t sample_count;
//! The number of samples to aggregate per recording entry.
Expand Down Expand Up @@ -105,15 +110,16 @@ static inline uint32_t get_random_busy(void) {
//! \brief Synchronously records the current contents of the core_counters to
//! the recording region.
static inline void record_aggregate_sample(void) {
recording.time = time;
recording_record(
RECORDING_CHANNEL_ID, core_counters, sizeof(core_counters));
RECORDING_CHANNEL_ID, &recording, sizeof(recording));
}

//! \brief Resets the state of the core_counters and the sample_count variables
//! to zero.
static inline void reset_core_counters(void) {
for (uint32_t i = 0 ; i < NUM_CPUS ; i++) {
core_counters[i] = 0;
recording.core_counters[i] = 0;
}
sample_count = 0;
}
Expand Down Expand Up @@ -154,7 +160,7 @@ static inline void count_core_states(void) {

for (uint32_t i = 0, j = 1 ; i < NUM_CPUS ; i++, j <<= 1) {
if (!(sample & j)) {
core_counters[i]++;
recording.core_counters[i]++;
}
}
}
Expand All @@ -175,10 +181,8 @@ static void sample_in_slot(UNUSED uint unused0, UNUSED uint unused1) {

recording_finalise();

// Subtract 1 from the time so this tick gets done again on the next
// run
time--;

// Invert the time calculation so that any time read is correct
time = (time * sample_frequency) / timer;
simulation_ready_to_read();

return;
Expand Down
27 changes: 27 additions & 0 deletions spinn_front_end_common/data/fec_data_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class _FecDataModel(object):
"_database_file_path",
"_database_socket_addresses",
"_ds_database_path",
"_energy_checkpoints",
"_executable_targets",
"_executable_types",
"_first_machine_time_step",
Expand Down Expand Up @@ -190,6 +191,7 @@ def _soft_reset(self) -> None:
self._first_machine_time_step = 0
self._run_step: Optional[int] = None
self._n_run_steps: Optional[int] = None
self._energy_checkpoints: List[int] = []

def _clear_notification_protocol(self) -> None:
if self._notification_protocol:
Expand Down Expand Up @@ -1355,3 +1357,28 @@ def iterate_live_output_devices(cls) -> Iterable[LiveOutputDevice]:
:rtype: iterable(LiveOutputDevice)
"""
return iter(cls.__fec_data._live_output_devices)

@classmethod
def add_energy_checkpoint(cls, checkpoint_ms: int):
"""
Add an energy checkpoint.
:param checkpoint: The checkpoint to be added in milliseconds
"""
cls.__fec_data._energy_checkpoints.append(checkpoint_ms)

@classmethod
def iterate_energy_checkpoints(cls) -> Iterable[int]:
"""
Iterate over energy checkpoints.
:rtype: iterable(int)
"""
return iter(cls.__fec_data._energy_checkpoints)

@classmethod
def clear_energy_checkpoints(cls) -> None:
"""
Clear all energy checkpoints.
"""
cls.__fec_data._energy_checkpoints.clear()
21 changes: 12 additions & 9 deletions spinn_front_end_common/interface/abstract_spinnaker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,6 @@ def __run(self, run_time: Optional[float], sync_time: float):
"Only binaries that use the simulation interface can be"
" run more than once")

self._adjust_config(run_time)

# Install the Control-C handler
if self.__is_main_thread():
signal.signal(signal.SIGINT, self.__signal_handler)
Expand Down Expand Up @@ -1756,7 +1754,7 @@ def _execute_load_application_data_specification(self) -> None:
:rtype: dict(tuple(int,int,int),DataWritten) or DsWriteInfo
"""
with FecTimer("Load Application data specification",
TimerWork.LOADING) as timer:
TimerWork.LOADING_DATA) as timer:
if timer.skip_if_virtual_board():
return
return load_application_data_specs()
Expand Down Expand Up @@ -1892,6 +1890,7 @@ def _do_load(self) -> None:
self._report_memory_on_chip()
self._report_compressed(compressed)
self._execute_application_load_executables()
self._execute_router_provenance_gatherer("Load", TimerWork.LOADING)

FecTimer.end_category(TimerCategory.LOADING)

Expand Down Expand Up @@ -1951,19 +1950,20 @@ def _execute_placements_provenance_gatherer(self) -> None:
timer.skip(str(ex))
return

def _execute_router_provenance_gatherer(self) -> None:
def _execute_router_provenance_gatherer(
self, prefix: str, phase: TimerWork) -> None:
"""
Runs, times and log the RouterProvenanceGatherer if requested.
"""
with FecTimer(
"Router provenance gatherer", TimerWork.EXTRACTING) as timer:
"Router provenance gatherer", phase) as timer:
if timer.skip_if_cfg_false("Reports",
"read_router_provenance_data"):
return
if timer.skip_if_virtual_board():
return
try:
router_provenance_gatherer()
router_provenance_gatherer(prefix)
except DataNotYetAvialable as ex:
timer.skip(str(ex))
return
Expand Down Expand Up @@ -1991,7 +1991,6 @@ def _do_read_provenance(self) -> None:
"""
self._execute_graph_provenance_gatherer()
self._execute_placements_provenance_gatherer()
self._execute_router_provenance_gatherer()
self._execute_profile_data_gatherer()

def _report_energy(self) -> None:
Expand All @@ -2004,7 +2003,6 @@ def _report_energy(self) -> None:
if timer.skip_if_virtual_board():
return

# TODO runtime is None
power_used = compute_energy_used()

energy_provenance_reporter(power_used)
Expand Down Expand Up @@ -2127,11 +2125,16 @@ def _do_extract_from_machine(self) -> None:
:param run_time: the run duration in milliseconds.
:type run_time: int or None
"""
self._execute_router_provenance_gatherer("Run", TimerWork.EXTRACTING)
for chip in FecDataView.get_machine().chips:
FecDataView().get_transceiver().clear_router_diagnostic_counters(
chip.x, chip.y)
self._execute_extract_iobuff()
self._execute_buffer_extractor()
self._execute_clear_io_buf()
self._execute_router_provenance_gatherer(
"Extract", TimerWork.EXTRACTING)

# FinaliseTimingData never needed as just pushed self._ to inputs
self._do_read_provenance()
self._report_energy()
self._do_provenance_reports()
Expand Down
20 changes: 6 additions & 14 deletions spinn_front_end_common/interface/config_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def __init__(self, data_writer_cls: Optional[Type[FecDataWriter]] = None):
self._debug_configs()
self._previous_handler()
self._reserve_system_vertices()
self._ensure_provenance_for_energy_report()

def __toggle_config(self, section: str, option: str, to_false: List[str],
to_true: List[str]):
Expand Down Expand Up @@ -178,20 +179,6 @@ def _reserve_system_vertices(self):
self._data_writer.add_sample_monitor_vertex(
sample_speedup_vertex(), False)

def _adjust_config(self, runtime: Optional[float]):
"""
Adjust and checks the configuration based on runtime
:param runtime:
:type runtime: int or bool
:raises ConfigurationException:
"""
if runtime is None:
if get_config_bool("Reports", "write_energy_report"):
set_config("Reports", "write_energy_report", "False")
logger.info("[Reports]write_energy_report has been set to "
"False as runtime is set to forever")

def _remove_excess_folders(
self, max_kept: int, starting_directory: str,
remove_errored_folders: Optional[bool]):
Expand Down Expand Up @@ -257,3 +244,8 @@ def _set_up_report_specifics(self) -> None:
f.write("\n")
f.write("Traceback of setup call:\n")
traceback.print_stack(file=f)

def _ensure_provenance_for_energy_report(self):
if get_config_bool("Reports", "write_energy_report"):
set_config("Reports", "read_router_provenance_data", "True")
set_config("Reports", "read_placements_provenance_data", "True")
Loading

0 comments on commit 81f4f24

Please sign in to comment.