Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom energy monitoring #1226

Merged
merged 23 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions c_common/models/chip_power_monitor/src/chip_power_monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ struct sample_params {
uint32_t frequency;
};

struct recording {
uint32_t time;
uint32_t core_counters[NUM_CPUS];
};

//! \brief The recording channel we use.
//!
//! Only one recording channel is used by this application.
Expand All @@ -73,7 +78,7 @@ static uint32_t time;
static uint32_t timer = 0;

//! Where we aggregate the sample activity counts.
static uint32_t core_counters[NUM_CPUS];
static struct recording recording;
//! How many samples have we done so far within this aggregate step?
static uint32_t sample_count;
//! The number of samples to aggregate per recording entry.
Expand Down Expand Up @@ -105,15 +110,16 @@ static inline uint32_t get_random_busy(void) {
//! \brief Synchronously records the current contents of the core_counters to
//! the recording region.
static inline void record_aggregate_sample(void) {
recording.time = time;
recording_record(
RECORDING_CHANNEL_ID, core_counters, sizeof(core_counters));
RECORDING_CHANNEL_ID, &recording, sizeof(recording));
}

//! \brief Resets the state of the core_counters and the sample_count variables
//! to zero.
static inline void reset_core_counters(void) {
for (uint32_t i = 0 ; i < NUM_CPUS ; i++) {
core_counters[i] = 0;
recording.core_counters[i] = 0;
}
sample_count = 0;
}
Expand Down Expand Up @@ -154,7 +160,7 @@ static inline void count_core_states(void) {

for (uint32_t i = 0, j = 1 ; i < NUM_CPUS ; i++, j <<= 1) {
if (!(sample & j)) {
core_counters[i]++;
recording.core_counters[i]++;
}
}
}
Expand All @@ -175,10 +181,8 @@ static void sample_in_slot(UNUSED uint unused0, UNUSED uint unused1) {

recording_finalise();

// Subtract 1 from the time so this tick gets done again on the next
// run
time--;

// Invert the time calculation so that any time read is correct
time = (time * sample_frequency) / timer;
simulation_ready_to_read();

return;
Expand Down
27 changes: 27 additions & 0 deletions spinn_front_end_common/data/fec_data_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class _FecDataModel(object):
"_database_file_path",
"_database_socket_addresses",
"_ds_database_path",
"_energy_checkpoints",
"_executable_targets",
"_executable_types",
"_first_machine_time_step",
Expand Down Expand Up @@ -190,6 +191,7 @@ def _soft_reset(self) -> None:
self._first_machine_time_step = 0
self._run_step: Optional[int] = None
self._n_run_steps: Optional[int] = None
self._energy_checkpoints: List[int] = []

def _clear_notification_protocol(self) -> None:
if self._notification_protocol:
Expand Down Expand Up @@ -1355,3 +1357,28 @@ def iterate_live_output_devices(cls) -> Iterable[LiveOutputDevice]:
:rtype: iterable(LiveOutputDevice)
"""
return iter(cls.__fec_data._live_output_devices)

@classmethod
def add_energy_checkpoint(cls, checkpoint_ms: int):
"""
Add an energy checkpoint.

:param checkpoint: The checkpoint to be added in milliseconds
"""
cls.__fec_data._energy_checkpoints.append(checkpoint_ms)

@classmethod
def iterate_energy_checkpoints(cls) -> Iterable[int]:
"""
Iterate over energy checkpoints.

:rtype: iterable(int)
"""
return iter(cls.__fec_data._energy_checkpoints)

@classmethod
def clear_energy_checkpoints(cls) -> None:
"""
Clear all energy checkpoints.
"""
cls.__fec_data._energy_checkpoints.clear()
21 changes: 12 additions & 9 deletions spinn_front_end_common/interface/abstract_spinnaker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,6 @@ def __run(self, run_time: Optional[float], sync_time: float):
"Only binaries that use the simulation interface can be"
" run more than once")

self._adjust_config(run_time)

# Install the Control-C handler
if self.__is_main_thread():
signal.signal(signal.SIGINT, self.__signal_handler)
Expand Down Expand Up @@ -1756,7 +1754,7 @@ def _execute_load_application_data_specification(self) -> None:
:rtype: dict(tuple(int,int,int),DataWritten) or DsWriteInfo
"""
with FecTimer("Load Application data specification",
TimerWork.LOADING) as timer:
TimerWork.LOADING_DATA) as timer:
if timer.skip_if_virtual_board():
return
return load_application_data_specs()
Expand Down Expand Up @@ -1892,6 +1890,7 @@ def _do_load(self) -> None:
self._report_memory_on_chip()
self._report_compressed(compressed)
self._execute_application_load_executables()
self._execute_router_provenance_gatherer("Load", TimerWork.LOADING)

FecTimer.end_category(TimerCategory.LOADING)

Expand Down Expand Up @@ -1951,19 +1950,20 @@ def _execute_placements_provenance_gatherer(self) -> None:
timer.skip(str(ex))
return

def _execute_router_provenance_gatherer(self) -> None:
def _execute_router_provenance_gatherer(
self, prefix: str, phase: TimerWork) -> None:
"""
Runs, times and log the RouterProvenanceGatherer if requested.
"""
with FecTimer(
"Router provenance gatherer", TimerWork.EXTRACTING) as timer:
"Router provenance gatherer", phase) as timer:
if timer.skip_if_cfg_false("Reports",
"read_router_provenance_data"):
return
if timer.skip_if_virtual_board():
return
try:
router_provenance_gatherer()
router_provenance_gatherer(prefix)
except DataNotYetAvialable as ex:
timer.skip(str(ex))
return
Expand Down Expand Up @@ -1991,7 +1991,6 @@ def _do_read_provenance(self) -> None:
"""
self._execute_graph_provenance_gatherer()
self._execute_placements_provenance_gatherer()
self._execute_router_provenance_gatherer()
self._execute_profile_data_gatherer()

def _report_energy(self) -> None:
Expand All @@ -2004,7 +2003,6 @@ def _report_energy(self) -> None:
if timer.skip_if_virtual_board():
return

# TODO runtime is None
power_used = compute_energy_used()

energy_provenance_reporter(power_used)
Expand Down Expand Up @@ -2127,11 +2125,16 @@ def _do_extract_from_machine(self) -> None:
:param run_time: the run duration in milliseconds.
:type run_time: int or None
"""
self._execute_router_provenance_gatherer("Run", TimerWork.EXTRACTING)
for chip in FecDataView.get_machine().chips:
FecDataView().get_transceiver().clear_router_diagnostic_counters(
chip.x, chip.y)
self._execute_extract_iobuff()
self._execute_buffer_extractor()
self._execute_clear_io_buf()
self._execute_router_provenance_gatherer(
"Extract", TimerWork.EXTRACTING)

# FinaliseTimingData never needed as just pushed self._ to inputs
self._do_read_provenance()
self._report_energy()
self._do_provenance_reports()
Expand Down
20 changes: 6 additions & 14 deletions spinn_front_end_common/interface/config_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def __init__(self, data_writer_cls: Optional[Type[FecDataWriter]] = None):
self._debug_configs()
self._previous_handler()
self._reserve_system_vertices()
self._ensure_provenance_for_energy_report()

def __toggle_config(self, section: str, option: str, to_false: List[str],
to_true: List[str]):
Expand Down Expand Up @@ -178,20 +179,6 @@ def _reserve_system_vertices(self):
self._data_writer.add_sample_monitor_vertex(
sample_speedup_vertex(), False)

def _adjust_config(self, runtime: Optional[float]):
"""
Adjust and checks the configuration based on runtime

:param runtime:
:type runtime: int or bool
:raises ConfigurationException:
"""
if runtime is None:
if get_config_bool("Reports", "write_energy_report"):
set_config("Reports", "write_energy_report", "False")
logger.info("[Reports]write_energy_report has been set to "
"False as runtime is set to forever")

def _remove_excess_folders(
self, max_kept: int, starting_directory: str,
remove_errored_folders: Optional[bool]):
Expand Down Expand Up @@ -257,3 +244,8 @@ def _set_up_report_specifics(self) -> None:
f.write("\n")
f.write("Traceback of setup call:\n")
traceback.print_stack(file=f)

def _ensure_provenance_for_energy_report(self):
if get_config_bool("Reports", "write_energy_report"):
set_config("Reports", "read_router_provenance_data", "True")
set_config("Reports", "read_placements_provenance_data", "True")
Loading