From f72b2cf7131e6f6c2dad66ff477db680a3322d2e Mon Sep 17 00:00:00 2001 From: Ian Lumsden Date: Wed, 9 Oct 2024 17:07:08 -0400 Subject: [PATCH] Adds comments describing the expected behavior of the virtual methods in TopdownCalculator --- doc/sphinx/PythonSupport.rst | 45 ++ doc/sphinx/build.rst | 246 +++++---- doc/sphinx/index.rst | 1 + src/services/topdown/HaswellTopdown.cpp | 400 +++++++------- src/services/topdown/HaswellTopdown.h | 41 +- src/services/topdown/SapphireRapidsTopdown.h | 41 +- .../topdown/SapphireRapidsTopdown_rdpmc.cpp | 329 ++++++------ .../topdown/SapphireRapidsTopdown_read.cpp | 493 +++++++++--------- src/services/topdown/TopdownCalculator.cpp | 133 ++--- src/services/topdown/TopdownCalculator.h | 103 ++-- 10 files changed, 940 insertions(+), 892 deletions(-) create mode 100644 doc/sphinx/PythonSupport.rst diff --git a/doc/sphinx/PythonSupport.rst b/doc/sphinx/PythonSupport.rst new file mode 100644 index 000000000..cc36928e2 --- /dev/null +++ b/doc/sphinx/PythonSupport.rst @@ -0,0 +1,45 @@ +Python support +============== + +Caliper provides Python bindings based on `pybind11 `_ +for the annotation and :code:`ConfigManager` APIs. To build Caliper with Python support, enable +the :code:`WITH_PYTHON_BINDINGS` option in the CMake configuration: + +.. code-block:: sh + + $ cmake -DWITH_PYTHON_BINDINGS=On .. + +Using the Python module +----------------------- + +The Python module requires pybind11 and an installation of Python that both supports +pybind11 and provides development headers (e.g., :code:`Python.h`) and libraries +(e.g., :code:`libpython3.8.so`). + +The Caliper Python module is installed in either :code:`lib/pythonX.Y/site-packages/` and/or +:code:`lib64/pythonX.Y/site-packages` in the Caliper installation directory. In these paths, +:code:`X.Y` corresponds to the major and minor version numbers of the Python installation used. +Additionally, :code:`lib/` and :code:`lib64/` will be used in accordance with the configuration +of the Python installed. To better understand the rules for where Python modules are installed, +see `this thread `_ +from the Python Software Foundation Discuss. + +To use the Caliper Python module, simply add the directories above to :code:`PYTHONPATH` or +:code:`sys.path`. Note that the module will be automatically added to :code:`PYTHONPATH` when +loading the Caliper package with Spack if the :code:`python` variant is enabled. +The module can then be imported with :code:`import pycaliper`. + +Caliper Python API +------------------ + +The Caliper Python API supports a significant subset of the C and C++ annotation APIs. +The simplest options are the :code:`pycaliper.begin_region()` and :code:`pycaliper.end_region()` +functions. Caliper's Python API also provides the :code:`pycaliper.annotate_function` decorator +as a higher-level way of annotating functions. + +The Python API also supports the Caliper :code:`ConfigManager` API (:doc:`ConfigManagerAPI`). +The example is examples/apps/py-example.py demonstrates the annotation and +:code:`ConfigManager` APIs for Python: + +.. literalinclude:: ../../examples/apps/py-example.py + :language: Python \ No newline at end of file diff --git a/doc/sphinx/build.rst b/doc/sphinx/build.rst index 5c8d34e12..d3179afcb 100644 --- a/doc/sphinx/build.rst +++ b/doc/sphinx/build.rst @@ -45,6 +45,9 @@ WITH_CUPTI WITH_FORTRAN Build the Fortran wrappers. +WITH_PYTHON_BINDINGS + Build the Python bindings. + WITH_GOTCHA Enable Gotcha support. Allows pthread, IO, and malloc/free tracking, and enables dynamic wrapping of MPI functions. @@ -75,6 +78,9 @@ WITH_OMPT WITH_PAPI Enable PAPI support. Set PAPI installation dir in PAPI_PREFIX. +WITH_PAPI_RDPMC + Specify that PAPI is built to use :code:`rdpmc` by default for reading counters. + WITH_ROCTX Build adapters to forward Caliper annotations to AMD's roctx annotation API. @@ -92,6 +98,10 @@ WITH_VTUNE Build adapters to forward Caliper annotations to Intel's VTune annotation API. Set Intel ITT API installation dir in ``ITT_PREFIX``. +WITH_ARCH + Specify the architecture for which you are building to enable + architecture-specific functionality (e.g., topdown calculations). + All options are off by default. On Linux, Gotcha is enabled by default. Linking Caliper programs @@ -144,116 +154,126 @@ Feature and build option overview The following table shows the features, recipes, and services that are enabled with the given Caliper and spack build options. -+----------------+---------------+---------------------------+--------------------+ -| CMake option | Spack option | Enabled features/recipes | Enabled services | -+================+===============+===========================+====================+ -| WITH_ADIAK | +adiak | Import adiak metadata in | adiak_import, | -| | | most config recipes | adiak_export | -+----------------+---------------+---------------------------+--------------------+ -| WITH_MPI | +mpi | - mpi-report recipe | mpi, mpireport | -| | | - profile.mpi, | | -| | | mpi.message.count, | | -| | | mpi.message.size | | -| | | recipe options | | -| | | - Cross-process | | -| | | aggregation | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_PAPI | +papi | - topdown.all, | papi, topdown | -| | | topdown.toplevel, | | -| | | topdown-counters.* | | -| | | recipe options for some | | -| | | x86 systems | | -| | | - PAPI counter collection | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_LIBDW | +libdw | - source.module, | symbollookup | -| | | source.function, | | -| | | source.location | | -| | | recipe options | | -| | | - Symbol name lookup | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_LIBPFM | +libpfm | PerfEvent counter | libpfm | -| | | collection and precise | | -| | | event sampling | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_LIBUNWIND | +libunwind | - callpath option for | callpath | -| | | sample-report and | | -| | | event-trace recipes | | -| | | (requires libdw) | | -| | | - Call stack unwinding | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_SAMPLER | +sampler | - sample-report, | sampler | -| | | hatchet-sample-profile | | -| | | recipes | | -| | | - sampling option for | | -| | | event-trace recipe | | -| | | - Linux sampling support | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_CUPTI | +cuda | - cuda-activity-report | cupti, cuptitrace | -| | | cuda-activity-profile | | -| | | recipes | | -| | | - profile.cuda, | | -| | | cuda.gputime, | | -| | | cuda.memcpy recipe | | -| | | options | | -| | | - CUDA API profiling | | -| | | - CUDA activity tracing | | -+----------------+ +---------------------------+--------------------+ -| WITH_NVTX | | - nvtx recipe | nvtx | -| | | - Caliper-to-NVTX region | | -| | | forwarding | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_ROCTRACER | +rocm | - rocm-activity-report, | roctracer | -| | | rocm-activity-profile | | -| | | recipes | | -| | | - profile.hip | | -| | | rocm.gputime, | | -| | | rocm.memcpy recipe | | -| | | options | | -| | | - ROCm/HIP API profiling | | -| | | - ROCm activity tracing | | -+----------------+ +---------------------------+--------------------+ -| WITH_ROCTX | | - roctx recipe | roctx | -| | | - Caliper-to-ROCTX region | | -| | | forwarding | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_OMPT | not available | - openmp-report recipe | ompt | -| | yet | - openmp.times, | | -| | | openmp.threads, | | -| | | openmp.efficiency | | -| | | recipe options | | -| | | - OpenMP tools interface | | -| | | support (CPU only, no | | -| | | target offload) | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_GOTCHA | +gotcha | - io.bytes.*, | io, pthread, | -| | | io.*.bandwidth, | sysalloc | -| | | mem.highwatermark, | | -| | | main_thread_only | | -| | | recipe options | | -| | | - Use Gotcha for MPI | | -| | | MPI function wrapping | | -| | | instead of PMPI | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_UMPIRE | not available | umpire.totals, | umpire | -| | yet | umpire.allocators options | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_VARIORUM | +variorum | Read variorum counters | variorum | -+----------------+---------------+---------------------------+--------------------+ -| WITH_PCP | not available | - mem.*.bandwidth, | pcp, pcp.memory | -| | yet | mem.*.bytes recipe | | -| | | options on some LLNL | | -| | | LC systems | | -| | | - Read Performance | | -| | | CoPilot counters | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_VTUNE | not available | Intel ITT API annotation | vtune | -| | yet | forwarding | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_CRAYPAT | not available | HPE CrayPAT API | craypat | -| | yet | annotation forwarding | | -+----------------+---------------+---------------------------+--------------------+ -| WITH_KOKKOS | +kokkos | Enable Kokkos tool API | kokkostime, | -| | | bindings | kokkoslookup | -+----------------+---------------+---------------------------+--------------------+ -| WITH_FORTRAN | +fortran | Enable Fortran API | | -+----------------+---------------+---------------------------+--------------------+ ++----------------------+---------------+---------------+---------------------------+--------------------+ +| CMake option | Default value | Spack option | Enabled features/recipes | Enabled services | ++======================+===============+===============+===========================+====================+ +| WITH_ADIAK | False | +adiak | Import adiak metadata in | adiak_import, | +| | | | most config recipes | adiak_export | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_MPI | False | +mpi | - mpi-report recipe | mpi, mpireport | +| | | | - profile.mpi, | | +| | | | mpi.message.count, | | +| | | | mpi.message.size | | +| | | | recipe options | | +| | | | - Cross-process | | +| | | | aggregation | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_PAPI | False | +papi | - topdown.all, | papi, topdown | +| | | | topdown.toplevel, | | +| | | | topdown-counters.* | | +| | | | recipe options for some | | +| | | | x86 systems | | +| | | | - PAPI counter collection | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_PAPI_RDPMC | True | not available | Topdown calculations | | +| | | yet | based on different | | +| | | | approaches to reading | | +| | | | counters in PAPI | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_LIBDW | False | +libdw | - source.module, | symbollookup | +| | | | source.function, | | +| | | | source.location | | +| | | | recipe options | | +| | | | - Symbol name lookup | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_LIBPFM | False | +libpfm | PerfEvent counter | libpfm | +| | | | collection and precise | | +| | | | event sampling | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_LIBUNWIND | False | +libunwind | - callpath option for | callpath | +| | | | sample-report and | | +| | | | event-trace recipes | | +| | | | (requires libdw) | | +| | | | - Call stack unwinding | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_SAMPLER | False | +sampler | - sample-report, | sampler | +| | | | hatchet-sample-profile | | +| | | | recipes | | +| | | | - sampling option for | | +| | | | event-trace recipe | | +| | | | - Linux sampling support | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_CUPTI | False | +cuda | - cuda-activity-report | cupti, cuptitrace | +| | | | cuda-activity-profile | | +| | | | recipes | | +| | | | - profile.cuda, | | +| | | | cuda.gputime, | | +| | | | cuda.memcpy recipe | | +| | | | options | | +| | | | - CUDA API profiling | | +| | | | - CUDA activity tracing | | ++----------------------+---------------+ +---------------------------+--------------------+ +| WITH_NVTX | False | | - nvtx recipe | nvtx | +| | | | - Caliper-to-NVTX region | | +| | | | forwarding | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_ROCTRACER | False | +rocm | - rocm-activity-report, | roctracer | +| | | | rocm-activity-profile | | +| | | | recipes | | +| | | | - profile.hip | | +| | | | rocm.gputime, | | +| | | | rocm.memcpy recipe | | +| | | | options | | +| | | | - ROCm/HIP API profiling | | +| | | | - ROCm activity tracing | | ++----------------------+---------------+ +---------------------------+--------------------+ +| WITH_ROCTX | False | | - roctx recipe | roctx | +| | | | - Caliper-to-ROCTX region | | +| | | | forwarding | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_OMPT | False | not available | - openmp-report recipe | ompt | +| | | yet | - openmp.times, | | +| | | | openmp.threads, | | +| | | | openmp.efficiency | | +| | | | recipe options | | +| | | | - OpenMP tools interface | | +| | | | support (CPU only, no | | +| | | | target offload) | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_GOTCHA | True on | +gotcha | - io.bytes.*, | io, pthread, | +| | Linux; | | io.*.bandwidth, | sysalloc | +| | False | | mem.highwatermark, | | +| | otherwise | | main_thread_only | | +| | | | recipe options | | +| | | | - Use Gotcha for MPI | | +| | | | MPI function wrapping | | +| | | | instead of PMPI | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_UMPIRE | False | not available | umpire.totals, | umpire | +| | | yet | umpire.allocators options | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_VARIORUM | False | +variorum | Read variorum counters | variorum | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_PCP | False | not available | - mem.*.bandwidth, | pcp, pcp.memory | +| | | yet | mem.*.bytes recipe | | +| | | | options on some LLNL | | +| | | | LC systems | | +| | | | - Read Performance | | +| | | | CoPilot counters | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_VTUNE | False | not available | Intel ITT API annotation | vtune | +| | | yet | forwarding | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_CRAYPAT | False | not available | HPE CrayPAT API | craypat | +| | | yet | annotation forwarding | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_KOKKOS | True | +kokkos | Enable Kokkos tool API | kokkostime, | +| | | | bindings | kokkoslookup | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_FORTRAN | False | +fortran | Enable Fortran API | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_PYTHON_BINDINGS | False | +python | Enable Python API | | ++----------------------+---------------+---------------+---------------------------+--------------------+ +| WITH_ARCH | No default | not available | Enable microarchitecture- | | +| | | yet | specific features | | ++----------------------+---------------+---------------+---------------------------+--------------------+ diff --git a/doc/sphinx/index.rst b/doc/sphinx/index.rst index acf1dd16f..692564f87 100644 --- a/doc/sphinx/index.rst +++ b/doc/sphinx/index.rst @@ -66,6 +66,7 @@ This section lists how-to articles for various use cases. SampleProfiling ThirdPartyTools FortranSupport + PythonSupport Reference documentation ------------------------------- diff --git a/src/services/topdown/HaswellTopdown.cpp b/src/services/topdown/HaswellTopdown.cpp index f149a6c55..f57acd047 100644 --- a/src/services/topdown/HaswellTopdown.cpp +++ b/src/services/topdown/HaswellTopdown.cpp @@ -2,249 +2,231 @@ #include -namespace cali { -namespace topdown { +namespace cali +{ +namespace topdown +{ HaswellTopdown::HaswellTopdown(IntelTopdownLevel level) : cali::topdown::TopdownCalculator( - level, - // top_counters - "CPU_CLK_THREAD_UNHALTED:THREAD_P" - ",IDQ_UOPS_NOT_DELIVERED:CORE" - ",INT_MISC:RECOVERY_CYCLES" - ",UOPS_ISSUED:ANY" - ",UOPS_RETIRED:RETIRE_SLOTS", - // all_counters - "BR_MISP_RETIRED:ALL_BRANCHES" - ",CPU_CLK_THREAD_UNHALTED:THREAD_P" - ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE" - ",CYCLE_ACTIVITY:STALLS_L1D_PENDING" - ",CYCLE_ACTIVITY:STALLS_L2_PENDING" - ",CYCLE_ACTIVITY:STALLS_LDM_PENDING" - ",IDQ_UOPS_NOT_DELIVERED:CORE" - ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE" - ",INT_MISC:RECOVERY_CYCLES" - ",MACHINE_CLEARS:COUNT" - ",MEM_LOAD_UOPS_RETIRED:L3_HIT" - ",MEM_LOAD_UOPS_RETIRED:L3_MISS" - ",UOPS_EXECUTED:CORE_CYCLES_GE_1" - ",UOPS_EXECUTED:CORE_CYCLES_GE_2" - ",UOPS_ISSUED:ANY" - ",UOPS_RETIRED:RETIRE_SLOTS", - // res_top - {"retiring", "backend_bound", "frontend_bound", "bad_speculation"}, - // res_all - {"retiring", "backend_bound", "frontend_bound", "bad_speculation", - "branch_mispredict", "machine_clears", "frontend_latency", - "frontend_bandwidth", "memory_bound", "core_bound", "ext_mem_bound", - "l1_bound", "l2_bound", "l3_bound"}) {} - -bool HaswellTopdown::check_for_disabled_multiplex() const { return false; } - -std::vector -HaswellTopdown::compute_toplevel(const std::vector &rec) { - std::vector ret; - - Variant v_cpu_clk_unhalted_thread_p = - get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P"); - Variant v_uops_retired_retire_slots = - get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS"); - Variant v_uops_issued_any = get_val_from_rec(rec, "UOPS_ISSUED:ANY"); - Variant v_int_misc_recovery_cycles = - get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES"); - Variant v_idq_uops_not_delivered_core = - get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE"); - - bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || - v_uops_retired_retire_slots.empty() || - v_uops_issued_any.empty() || - v_int_misc_recovery_cycles.empty() || - v_idq_uops_not_delivered_core.empty(); - bool is_nonzero = v_cpu_clk_unhalted_thread_p.to_double() > 0.0 && - v_uops_retired_retire_slots.to_double() > 0.0 && - v_uops_issued_any.to_double() > 0.0 && - v_int_misc_recovery_cycles.to_double() > 0.0 && - v_idq_uops_not_delivered_core.to_double() > 0.0; - - double slots = 4.0 * v_cpu_clk_unhalted_thread_p.to_double(); - - if (is_incomplete || !is_nonzero || slots < 1.0) + level, + // top_counters + "CPU_CLK_THREAD_UNHALTED:THREAD_P" + ",IDQ_UOPS_NOT_DELIVERED:CORE" + ",INT_MISC:RECOVERY_CYCLES" + ",UOPS_ISSUED:ANY" + ",UOPS_RETIRED:RETIRE_SLOTS", + // all_counters + "BR_MISP_RETIRED:ALL_BRANCHES" + ",CPU_CLK_THREAD_UNHALTED:THREAD_P" + ",CYCLE_ACTIVITY:CYCLES_NO_EXECUTE" + ",CYCLE_ACTIVITY:STALLS_L1D_PENDING" + ",CYCLE_ACTIVITY:STALLS_L2_PENDING" + ",CYCLE_ACTIVITY:STALLS_LDM_PENDING" + ",IDQ_UOPS_NOT_DELIVERED:CORE" + ",IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE" + ",INT_MISC:RECOVERY_CYCLES" + ",MACHINE_CLEARS:COUNT" + ",MEM_LOAD_UOPS_RETIRED:L3_HIT" + ",MEM_LOAD_UOPS_RETIRED:L3_MISS" + ",UOPS_EXECUTED:CORE_CYCLES_GE_1" + ",UOPS_EXECUTED:CORE_CYCLES_GE_2" + ",UOPS_ISSUED:ANY" + ",UOPS_RETIRED:RETIRE_SLOTS", + // res_top + { "retiring", "backend_bound", "frontend_bound", "bad_speculation" }, + // res_all + { "retiring", + "backend_bound", + "frontend_bound", + "bad_speculation", + "branch_mispredict", + "machine_clears", + "frontend_latency", + "frontend_bandwidth", + "memory_bound", + "core_bound", + "ext_mem_bound", + "l1_bound", + "l2_bound", + "l3_bound" } + ) +{} + +bool HaswellTopdown::check_for_disabled_multiplex() const +{ + return false; +} + +std::vector HaswellTopdown::compute_toplevel(const std::vector& rec) +{ + std::vector ret; + + Variant v_cpu_clk_unhalted_thread_p = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P"); + Variant v_uops_retired_retire_slots = get_val_from_rec(rec, "UOPS_RETIRED:RETIRE_SLOTS"); + Variant v_uops_issued_any = get_val_from_rec(rec, "UOPS_ISSUED:ANY"); + Variant v_int_misc_recovery_cycles = get_val_from_rec(rec, "INT_MISC:RECOVERY_CYCLES"); + Variant v_idq_uops_not_delivered_core = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CORE"); + + bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_uops_retired_retire_slots.empty() + || v_uops_issued_any.empty() || v_int_misc_recovery_cycles.empty() + || v_idq_uops_not_delivered_core.empty(); + bool is_nonzero = v_cpu_clk_unhalted_thread_p.to_double() > 0.0 && v_uops_retired_retire_slots.to_double() > 0.0 + && v_uops_issued_any.to_double() > 0.0 && v_int_misc_recovery_cycles.to_double() > 0.0 + && v_idq_uops_not_delivered_core.to_double() > 0.0; + + double slots = 4.0 * v_cpu_clk_unhalted_thread_p.to_double(); + + if (is_incomplete || !is_nonzero || slots < 1.0) + return ret; + + double retiring = v_uops_retired_retire_slots.to_double() / slots; + double bad_speculation = (v_uops_issued_any.to_double() - v_uops_retired_retire_slots.to_double() + + 4.0 * v_int_misc_recovery_cycles.to_double()) + / slots; + double frontend_bound = v_idq_uops_not_delivered_core.to_double() / slots; + double backend_bound = 1.0 - (retiring + bad_speculation + frontend_bound); + + ret.reserve(4); + ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0)))); + ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0)))); + ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0)))); + ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0)))); + return ret; +} - double retiring = v_uops_retired_retire_slots.to_double() / slots; - double bad_speculation = - (v_uops_issued_any.to_double() - v_uops_retired_retire_slots.to_double() + - 4.0 * v_int_misc_recovery_cycles.to_double()) / - slots; - double frontend_bound = v_idq_uops_not_delivered_core.to_double() / slots; - double backend_bound = 1.0 - (retiring + bad_speculation + frontend_bound); - - ret.reserve(4); - ret.push_back( - Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0)))); - ret.push_back(Entry(m_result_attrs["backend_bound"], - Variant(std::max(backend_bound, 0.0)))); - ret.push_back(Entry(m_result_attrs["frontend_bound"], - Variant(std::max(frontend_bound, 0.0)))); - ret.push_back(Entry(m_result_attrs["bad_speculation"], - Variant(std::max(bad_speculation, 0.0)))); - - return ret; +std::size_t HaswellTopdown::get_num_expected_toplevel() const +{ + return 4; } -std::size_t HaswellTopdown::get_num_expected_toplevel() const { return 4; } +std::vector HaswellTopdown::compute_retiring(const std::vector& rec) +{ + return {}; +} -std::vector -HaswellTopdown::compute_retiring(const std::vector &rec) { - return {}; +std::size_t HaswellTopdown::get_num_expected_retiring() const +{ + return 0; } -std::size_t HaswellTopdown::get_num_expected_retiring() const { return 0; } - -std::vector -HaswellTopdown::compute_backend_bound(const std::vector &rec) { - std::vector ret; - - Variant v_cpu_clk_unhalted_thread_p = - get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P"); - Variant v_cycle_activity_stalls_ldm_pending = - get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_LDM_PENDING"); - Variant v_cycle_activity_cycles_no_execute = - get_val_from_rec(rec, "CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"); - Variant v_uops_executed_core_cycles_ge_1 = - get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_1"); - Variant v_uops_executed_core_cycles_ge_2 = - get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_2"); - Variant v_mem_load_uops_retired_l3_miss = - get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_MISS"); - Variant v_mem_load_uops_retired_l3_hit = - get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_HIT"); - Variant v_cycle_activity_stalls_l2_pending = - get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L2_PENDING"); - Variant v_cycle_activity_stalls_l1d_pending = - get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L1D_PENDING"); - - bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || - v_cycle_activity_stalls_ldm_pending.empty() || - v_cycle_activity_cycles_no_execute.empty() || - v_uops_executed_core_cycles_ge_1.empty() || - v_uops_executed_core_cycles_ge_2.empty() || - v_mem_load_uops_retired_l3_miss.empty() || - v_mem_load_uops_retired_l3_hit.empty() || - v_cycle_activity_stalls_l2_pending.empty() || - v_cycle_activity_stalls_l1d_pending.empty(); - - double clocks = v_cpu_clk_unhalted_thread_p.to_double(); - - if (is_incomplete || !(clocks > 1.0)) - return ret; +std::vector HaswellTopdown::compute_backend_bound(const std::vector& rec) +{ + std::vector ret; + + Variant v_cpu_clk_unhalted_thread_p = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P"); + Variant v_cycle_activity_stalls_ldm_pending = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_LDM_PENDING"); + Variant v_cycle_activity_cycles_no_execute = get_val_from_rec(rec, "CYCLE_ACTIVITY:CYCLES_NO_EXECUTE"); + Variant v_uops_executed_core_cycles_ge_1 = get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_1"); + Variant v_uops_executed_core_cycles_ge_2 = get_val_from_rec(rec, "UOPS_EXECUTED:CORE_CYCLES_GE_2"); + Variant v_mem_load_uops_retired_l3_miss = get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_MISS"); + Variant v_mem_load_uops_retired_l3_hit = get_val_from_rec(rec, "MEM_LOAD_UOPS_RETIRED:L3_HIT"); + Variant v_cycle_activity_stalls_l2_pending = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L2_PENDING"); + Variant v_cycle_activity_stalls_l1d_pending = get_val_from_rec(rec, "CYCLE_ACTIVITY:STALLS_L1D_PENDING"); + + bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_cycle_activity_stalls_ldm_pending.empty() + || v_cycle_activity_cycles_no_execute.empty() || v_uops_executed_core_cycles_ge_1.empty() + || v_uops_executed_core_cycles_ge_2.empty() || v_mem_load_uops_retired_l3_miss.empty() + || v_mem_load_uops_retired_l3_hit.empty() || v_cycle_activity_stalls_l2_pending.empty() + || v_cycle_activity_stalls_l1d_pending.empty(); + + double clocks = v_cpu_clk_unhalted_thread_p.to_double(); + + if (is_incomplete || !(clocks > 1.0)) + return ret; + + double memory_bound = v_cycle_activity_stalls_ldm_pending.to_double() / clocks; + double be_bound_at_exe = + (v_cycle_activity_cycles_no_execute.to_double() + v_uops_executed_core_cycles_ge_1.to_double() + - v_uops_executed_core_cycles_ge_2.to_double()) + / clocks; + double l3_tot = v_mem_load_uops_retired_l3_hit.to_double() + 7.0 * v_mem_load_uops_retired_l3_miss.to_double(); + double l3_hit_fraction = 0.0; + double l3_miss_fraction = 0.0; + if (l3_tot > 0.0) { + l3_hit_fraction = v_mem_load_uops_retired_l3_hit.to_double() / l3_tot; + l3_miss_fraction = v_mem_load_uops_retired_l3_miss.to_double() / l3_tot; + } + double ext_mem_bound = v_cycle_activity_stalls_l2_pending.to_double() * l3_miss_fraction / clocks; + double l1_bound = + (v_cycle_activity_stalls_ldm_pending.to_double() - v_cycle_activity_stalls_l1d_pending.to_double()) / clocks; + double l2_bound = + (v_cycle_activity_stalls_l1d_pending.to_double() - v_cycle_activity_stalls_l2_pending.to_double()) / clocks; + double l3_bound = v_cycle_activity_stalls_l2_pending.to_double() * l3_hit_fraction / clocks; + + ret.reserve(6); + ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(memory_bound))); + ret.push_back(Entry(m_result_attrs["core_bound"], Variant(be_bound_at_exe - memory_bound))); + ret.push_back(Entry(m_result_attrs["ext_mem_bound"], Variant(ext_mem_bound))); + ret.push_back(Entry(m_result_attrs["l1_bound"], Variant(l1_bound))); + ret.push_back(Entry(m_result_attrs["l2_bound"], Variant(l2_bound))); + ret.push_back(Entry(m_result_attrs["l3_bound"], Variant(l3_bound))); - double memory_bound = - v_cycle_activity_stalls_ldm_pending.to_double() / clocks; - double be_bound_at_exe = (v_cycle_activity_cycles_no_execute.to_double() + - v_uops_executed_core_cycles_ge_1.to_double() - - v_uops_executed_core_cycles_ge_2.to_double()) / - clocks; - double l3_tot = v_mem_load_uops_retired_l3_hit.to_double() + - 7.0 * v_mem_load_uops_retired_l3_miss.to_double(); - double l3_hit_fraction = 0.0; - double l3_miss_fraction = 0.0; - if (l3_tot > 0.0) { - l3_hit_fraction = v_mem_load_uops_retired_l3_hit.to_double() / l3_tot; - l3_miss_fraction = v_mem_load_uops_retired_l3_miss.to_double() / l3_tot; - } - double ext_mem_bound = v_cycle_activity_stalls_l2_pending.to_double() * - l3_miss_fraction / clocks; - double l1_bound = (v_cycle_activity_stalls_ldm_pending.to_double() - - v_cycle_activity_stalls_l1d_pending.to_double()) / - clocks; - double l2_bound = (v_cycle_activity_stalls_l1d_pending.to_double() - - v_cycle_activity_stalls_l2_pending.to_double()) / - clocks; - double l3_bound = - v_cycle_activity_stalls_l2_pending.to_double() * l3_hit_fraction / clocks; - - ret.reserve(6); - ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(memory_bound))); - ret.push_back(Entry(m_result_attrs["core_bound"], - Variant(be_bound_at_exe - memory_bound))); - ret.push_back(Entry(m_result_attrs["ext_mem_bound"], Variant(ext_mem_bound))); - ret.push_back(Entry(m_result_attrs["l1_bound"], Variant(l1_bound))); - ret.push_back(Entry(m_result_attrs["l2_bound"], Variant(l2_bound))); - ret.push_back(Entry(m_result_attrs["l3_bound"], Variant(l3_bound))); - - return ret; + return ret; } -std::size_t HaswellTopdown::get_num_expected_backend_bound() const { return 6; } +std::size_t HaswellTopdown::get_num_expected_backend_bound() const +{ + return 6; +} -std::vector -HaswellTopdown::compute_frontend_bound(const std::vector &rec) { - std::vector ret; +std::vector HaswellTopdown::compute_frontend_bound(const std::vector& rec) +{ + std::vector ret; - Variant v_cpu_clk_unhalted_thread_p = - get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P"); - Variant v_idq_uops_not_delivered = - get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"); + Variant v_cpu_clk_unhalted_thread_p = get_val_from_rec(rec, "CPU_CLK_THREAD_UNHALTED:THREAD_P"); + Variant v_idq_uops_not_delivered = get_val_from_rec(rec, "IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE"); - bool is_incomplete = - v_cpu_clk_unhalted_thread_p.empty() || v_idq_uops_not_delivered.empty(); + bool is_incomplete = v_cpu_clk_unhalted_thread_p.empty() || v_idq_uops_not_delivered.empty(); - double clocks = v_cpu_clk_unhalted_thread_p.to_double(); - double uops = v_idq_uops_not_delivered.to_double(); + double clocks = v_cpu_clk_unhalted_thread_p.to_double(); + double uops = v_idq_uops_not_delivered.to_double(); - if (is_incomplete || clocks < 1.0 || uops > clocks) - return ret; + if (is_incomplete || clocks < 1.0 || uops > clocks) + return ret; - double fe_latency = uops / clocks; + double fe_latency = uops / clocks; - ret.reserve(2); - ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(fe_latency))); - ret.push_back( - Entry(m_result_attrs["frontend_bandwidth"], Variant(1.0 - fe_latency))); + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(fe_latency))); + ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], Variant(1.0 - fe_latency))); - return ret; + return ret; } -std::size_t HaswellTopdown::get_num_expected_frontend_bound() const { - return 2; +std::size_t HaswellTopdown::get_num_expected_frontend_bound() const +{ + return 2; } -std::vector -HaswellTopdown::compute_bad_speculation(const std::vector &rec) { - std::vector ret; +std::vector HaswellTopdown::compute_bad_speculation(const std::vector& rec) +{ + std::vector ret; - Variant v_br_misp_retired_all_branches = - get_val_from_rec(rec, "BR_MISP_RETIRED:ALL_BRANCHES"); - Variant v_machine_clears_count = - get_val_from_rec(rec, "MACHINE_CLEARS:COUNT"); + Variant v_br_misp_retired_all_branches = get_val_from_rec(rec, "BR_MISP_RETIRED:ALL_BRANCHES"); + Variant v_machine_clears_count = get_val_from_rec(rec, "MACHINE_CLEARS:COUNT"); - bool is_incomplete = - v_br_misp_retired_all_branches.empty() || v_machine_clears_count.empty(); + bool is_incomplete = v_br_misp_retired_all_branches.empty() || v_machine_clears_count.empty(); - double br_misp_retired_all_branches = - v_br_misp_retired_all_branches.to_double(); - double machine_clears_count = v_machine_clears_count.to_double(); + double br_misp_retired_all_branches = v_br_misp_retired_all_branches.to_double(); + double machine_clears_count = v_machine_clears_count.to_double(); - if (is_incomplete || - !(br_misp_retired_all_branches + machine_clears_count > 1.0)) - return ret; + if (is_incomplete || !(br_misp_retired_all_branches + machine_clears_count > 1.0)) + return ret; - double branch_mispredict = - br_misp_retired_all_branches / - (br_misp_retired_all_branches + machine_clears_count); + double branch_mispredict = br_misp_retired_all_branches / (br_misp_retired_all_branches + machine_clears_count); - ret.reserve(2); - ret.push_back( - Entry(m_result_attrs["branch_mispredict"], Variant(branch_mispredict))); - ret.push_back(Entry(m_result_attrs["machine_clears"], - Variant(1.0 - branch_mispredict))); + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["branch_mispredict"], Variant(branch_mispredict))); + ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(1.0 - branch_mispredict))); - return ret; + return ret; } -std::size_t HaswellTopdown::get_num_expected_bad_speculation() const { - return 2; +std::size_t HaswellTopdown::get_num_expected_bad_speculation() const +{ + return 2; } } // namespace topdown diff --git a/src/services/topdown/HaswellTopdown.h b/src/services/topdown/HaswellTopdown.h index 5ca0a9bed..01c99ebc3 100644 --- a/src/services/topdown/HaswellTopdown.h +++ b/src/services/topdown/HaswellTopdown.h @@ -3,41 +3,40 @@ #include "TopdownCalculator.h" -namespace cali { -namespace topdown { +namespace cali +{ +namespace topdown +{ -class HaswellTopdown : public TopdownCalculator { +class HaswellTopdown : public TopdownCalculator +{ public: - HaswellTopdown(IntelTopdownLevel level); - virtual ~HaswellTopdown() = default; + HaswellTopdown(IntelTopdownLevel level); - virtual bool check_for_disabled_multiplex() const override; + virtual ~HaswellTopdown() = default; - virtual std::vector - compute_toplevel(const std::vector &rec) override; + virtual bool check_for_disabled_multiplex() const override; - virtual std::size_t get_num_expected_toplevel() const override; + virtual std::vector compute_toplevel(const std::vector& rec) override; - virtual std::vector - compute_retiring(const std::vector &rec) override; + virtual std::size_t get_num_expected_toplevel() const override; - virtual std::size_t get_num_expected_retiring() const override; + virtual std::vector compute_retiring(const std::vector& rec) override; - virtual std::vector - compute_backend_bound(const std::vector &rec) override; + virtual std::size_t get_num_expected_retiring() const override; - virtual std::size_t get_num_expected_backend_bound() const override; + virtual std::vector compute_backend_bound(const std::vector& rec) override; - virtual std::vector - compute_frontend_bound(const std::vector &rec) override; + virtual std::size_t get_num_expected_backend_bound() const override; - virtual std::size_t get_num_expected_frontend_bound() const override; + virtual std::vector compute_frontend_bound(const std::vector& rec) override; - virtual std::vector - compute_bad_speculation(const std::vector &rec) override; + virtual std::size_t get_num_expected_frontend_bound() const override; - virtual std::size_t get_num_expected_bad_speculation() const override; + virtual std::vector compute_bad_speculation(const std::vector& rec) override; + + virtual std::size_t get_num_expected_bad_speculation() const override; }; } // namespace topdown diff --git a/src/services/topdown/SapphireRapidsTopdown.h b/src/services/topdown/SapphireRapidsTopdown.h index 8fc75282b..bdba3bd8b 100644 --- a/src/services/topdown/SapphireRapidsTopdown.h +++ b/src/services/topdown/SapphireRapidsTopdown.h @@ -3,41 +3,40 @@ #include "TopdownCalculator.h" -namespace cali { -namespace topdown { +namespace cali +{ +namespace topdown +{ -class SapphireRapidsTopdown : public TopdownCalculator { +class SapphireRapidsTopdown : public TopdownCalculator +{ public: - SapphireRapidsTopdown(IntelTopdownLevel level); - virtual ~SapphireRapidsTopdown() = default; + SapphireRapidsTopdown(IntelTopdownLevel level); - virtual bool check_for_disabled_multiplex() const override; + virtual ~SapphireRapidsTopdown() = default; - virtual std::vector - compute_toplevel(const std::vector &rec) override; + virtual bool check_for_disabled_multiplex() const override; - virtual std::size_t get_num_expected_toplevel() const override; + virtual std::vector compute_toplevel(const std::vector& rec) override; - virtual std::vector - compute_retiring(const std::vector &rec) override; + virtual std::size_t get_num_expected_toplevel() const override; - virtual std::size_t get_num_expected_retiring() const override; + virtual std::vector compute_retiring(const std::vector& rec) override; - virtual std::vector - compute_backend_bound(const std::vector &rec) override; + virtual std::size_t get_num_expected_retiring() const override; - virtual std::size_t get_num_expected_backend_bound() const override; + virtual std::vector compute_backend_bound(const std::vector& rec) override; - virtual std::vector - compute_frontend_bound(const std::vector &rec) override; + virtual std::size_t get_num_expected_backend_bound() const override; - virtual std::size_t get_num_expected_frontend_bound() const override; + virtual std::vector compute_frontend_bound(const std::vector& rec) override; - virtual std::vector - compute_bad_speculation(const std::vector &rec) override; + virtual std::size_t get_num_expected_frontend_bound() const override; - virtual std::size_t get_num_expected_bad_speculation() const override; + virtual std::vector compute_bad_speculation(const std::vector& rec) override; + + virtual std::size_t get_num_expected_bad_speculation() const override; }; } // namespace topdown diff --git a/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp b/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp index a7e55bcf9..79df5dbcc 100644 --- a/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp +++ b/src/services/topdown/SapphireRapidsTopdown_rdpmc.cpp @@ -12,233 +12,222 @@ #define FETCH_LAT_OFFSET 6 #define MEM_BOUND_OFFSET 7 -static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value, - uint64_t offset) { - return (double)((rdpmc_value >> (offset * 8)) & 0xff) / 0xff; +static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value, uint64_t offset) +{ + return (double) ((rdpmc_value >> (offset * 8)) & 0xff) / 0xff; } -namespace cali { -namespace topdown { +namespace cali +{ +namespace topdown +{ SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level) : cali::topdown::TopdownCalculator( - level, - // top_counters - "perf::slots" - ",perf::topdown-retiring", - // all_counters - "perf::slots" - ",perf::topdown-retiring", - // res_top - {"retiring", "backend_bound", "frontend_bound", "bad_speculation"}, - // res_all - {"retiring", "backend_bound", "frontend_bound", "bad_speculation", - "branch_mispredict", "machine_clears", "frontend_latency", - "frontend_bandwidth", "memory_bound", "core_bound", "light_ops", - "heavy_ops"}) {} - -bool SapphireRapidsTopdown::check_for_disabled_multiplex() const { - return true; + level, + // top_counters + "perf::slots" + ",perf::topdown-retiring", + // all_counters + "perf::slots" + ",perf::topdown-retiring", + // res_top + { "retiring", "backend_bound", "frontend_bound", "bad_speculation" }, + // res_all + { "retiring", + "backend_bound", + "frontend_bound", + "bad_speculation", + "branch_mispredict", + "machine_clears", + "frontend_latency", + "frontend_bandwidth", + "memory_bound", + "core_bound", + "light_ops", + "heavy_ops" } + ) +{} + +bool SapphireRapidsTopdown::check_for_disabled_multiplex() const +{ + return true; } -std::vector -SapphireRapidsTopdown::compute_toplevel(const std::vector &rec) { - std::vector ret; +std::vector SapphireRapidsTopdown::compute_toplevel(const std::vector& rec) +{ + std::vector ret; - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); - // Check if any Variant is empty (use .empty()) - bool is_incomplete = - v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); - // Check if all Variants are greater than 0 when casted to doubles (use - // .to_double()) - bool is_nonzero = v_tma_metrics.to_uint() > 0; + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); + // Check if all Variants are greater than 0 when casted to doubles (use + // .to_double()) + bool is_nonzero = v_tma_metrics.to_uint() > 0; - // Check if bad values were obtained - if (is_incomplete || !is_nonzero) - return ret; + // Check if bad values were obtained + if (is_incomplete || !is_nonzero) + return ret; + + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); + + double retiring = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET); + double frontend_bound = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET); + double backend_bound = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET); + double bad_speculation = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET); + + // Add toplevel metrics to vector of Entry + ret.reserve(4); + ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0)))); + ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0)))); + ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0)))); + ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0)))); - uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); - - double retiring = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET); - double frontend_bound = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET); - double backend_bound = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET); - double bad_speculation = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET); - - // Add toplevel metrics to vector of Entry - ret.reserve(4); - ret.push_back( - Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0)))); - ret.push_back(Entry(m_result_attrs["backend_bound"], - Variant(std::max(backend_bound, 0.0)))); - ret.push_back(Entry(m_result_attrs["frontend_bound"], - Variant(std::max(frontend_bound, 0.0)))); - ret.push_back(Entry(m_result_attrs["bad_speculation"], - Variant(std::max(bad_speculation, 0.0)))); - - return ret; + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const { - return 4; +std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const +{ + return 4; } -std::vector -SapphireRapidsTopdown::compute_retiring(const std::vector &rec) { - std::vector ret; +std::vector SapphireRapidsTopdown::compute_retiring(const std::vector& rec) +{ + std::vector ret; - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); - // Check if any Variant is empty (use .empty()) - bool is_incomplete = - v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); - // Check if bad values were obtained - if (is_incomplete) - return ret; + // Check if bad values were obtained + if (is_incomplete) + return ret; - uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); - double retiring = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET); - double heavy_ops = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET); - double light_ops = std::max(0.0, retiring - heavy_ops); + double retiring = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET); + double heavy_ops = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET); + double light_ops = std::max(0.0, retiring - heavy_ops); - // Add toplevel metrics to vector of Entry - ret.reserve(2); - ret.push_back( - Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0)))); - ret.push_back( - Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0)))); + // Add toplevel metrics to vector of Entry + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0)))); + ret.push_back(Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0)))); - return ret; + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const { - return 2; +std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const +{ + return 2; } -std::vector -SapphireRapidsTopdown::compute_backend_bound(const std::vector &rec) { - std::vector ret; +std::vector SapphireRapidsTopdown::compute_backend_bound(const std::vector& rec) +{ + std::vector ret; - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); - // Check if any Variant is empty (use .empty()) - bool is_incomplete = - v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); - // Check if bad values were obtained - if (is_incomplete) - return ret; + // Check if bad values were obtained + if (is_incomplete) + return ret; - uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); - double backend_bound = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET); - double memory_bound = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET); - double core_bound = std::max(0.0, backend_bound - memory_bound); + double backend_bound = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET); + double memory_bound = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET); + double core_bound = std::max(0.0, backend_bound - memory_bound); - // Add toplevel metrics to vector of Entry - ret.reserve(2); - ret.push_back(Entry(m_result_attrs["memory_bound"], - Variant(std::max(memory_bound, 0.0)))); - ret.push_back( - Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0)))); + // Add toplevel metrics to vector of Entry + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(std::max(memory_bound, 0.0)))); + ret.push_back(Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0)))); - return ret; + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const { - return 2; +std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const +{ + return 2; } -std::vector -SapphireRapidsTopdown::compute_frontend_bound(const std::vector &rec) { - std::vector ret; +std::vector SapphireRapidsTopdown::compute_frontend_bound(const std::vector& rec) +{ + std::vector ret; - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); - // Check if any Variant is empty (use .empty()) - bool is_incomplete = - v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); - // Check if bad values were obtained - if (is_incomplete) - return ret; + // Check if bad values were obtained + if (is_incomplete) + return ret; - uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); - double frontend_bound = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET); - double fetch_latency = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET); - double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency); + double frontend_bound = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET); + double fetch_latency = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET); + double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency); - // Add toplevel metrics to vector of Entry - ret.reserve(2); - ret.push_back(Entry(m_result_attrs["frontend_latency"], - Variant(std::max(fetch_latency, 0.0)))); - ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], - Variant(std::max(fetch_bandwidth, 0.0)))); + // Add toplevel metrics to vector of Entry + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(std::max(fetch_latency, 0.0)))); + ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], Variant(std::max(fetch_bandwidth, 0.0)))); - return ret; + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const { - return 2; +std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const +{ + return 2; } -std::vector -SapphireRapidsTopdown::compute_bad_speculation(const std::vector &rec) { - std::vector ret; +std::vector SapphireRapidsTopdown::compute_bad_speculation(const std::vector& rec) +{ + std::vector ret; - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring"); - // Check if any Variant is empty (use .empty()) - bool is_incomplete = - v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty(); - // Check if bad values were obtained - if (is_incomplete) - return ret; + // Check if bad values were obtained + if (is_incomplete) + return ret; - uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); + uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint(); - double bad_speculation = - get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET); - double branch_mispredict = get_tma_percent_from_rdpmc_value( - tma_metric_papi_rdpmc, BR_MISPRED_OFFSET); - double machine_clears = std::max(0.0, bad_speculation - branch_mispredict); + double bad_speculation = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET); + double branch_mispredict = get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BR_MISPRED_OFFSET); + double machine_clears = std::max(0.0, bad_speculation - branch_mispredict); - // Add toplevel metrics to vector of Entry - ret.reserve(2); - ret.push_back(Entry(m_result_attrs["branch_mispredict"], - Variant(std::max(branch_mispredict, 0.0)))); - ret.push_back(Entry(m_result_attrs["machine_clears"], - Variant(std::max(machine_clears, 0.0)))); + // Add toplevel metrics to vector of Entry + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["branch_mispredict"], Variant(std::max(branch_mispredict, 0.0)))); + ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(std::max(machine_clears, 0.0)))); - return ret; + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const { - return 2; +std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const +{ + return 2; } } // namespace topdown diff --git a/src/services/topdown/SapphireRapidsTopdown_read.cpp b/src/services/topdown/SapphireRapidsTopdown_read.cpp index 1739e1447..1e480505f 100644 --- a/src/services/topdown/SapphireRapidsTopdown_read.cpp +++ b/src/services/topdown/SapphireRapidsTopdown_read.cpp @@ -2,301 +2,280 @@ #include -namespace cali { -namespace topdown { +namespace cali +{ +namespace topdown +{ SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level) : cali::topdown::TopdownCalculator( - level, - // top_counters - "perf::slots" - ",perf::topdown-retiring" - ",perf::topdown-bad-spec" - ",perf::topdown-fe-bound" - ",perf::topdown-be-bound" - ",INT_MISC:UOP_DROPPING", - // all_counters - "perf::slots" - ",perf::topdown-retiring" - ",perf::topdown-bad-spec" - ",perf::topdown-fe-bound" - ",perf::topdown-be-bound" - ",INT_MISC:UOP_DROPPING" - ",perf_raw::r8400" // topdown-heavy-ops - ",perf_raw::r8500" // topdown-br-mispredict - ",perf_raw::r8600" // topdown-fetch-lat - ",perf_raw::r8700", // topdown-mem-bound - // res_top - {"retiring", "backend_bound", "frontend_bound", "bad_speculation"}, - // res_all - {"retiring", "backend_bound", "frontend_bound", "bad_speculation", - "branch_mispredict", "machine_clears", "frontend_latency", - "frontend_bandwidth", "memory_bound", "core_bound", "light_ops", - "heavy_ops"}) {} - -bool SapphireRapidsTopdown::check_for_disabled_multiplex() const { - return true; + level, + // top_counters + "perf::slots" + ",perf::topdown-retiring" + ",perf::topdown-bad-spec" + ",perf::topdown-fe-bound" + ",perf::topdown-be-bound" + ",INT_MISC:UOP_DROPPING", + // all_counters + "perf::slots" + ",perf::topdown-retiring" + ",perf::topdown-bad-spec" + ",perf::topdown-fe-bound" + ",perf::topdown-be-bound" + ",INT_MISC:UOP_DROPPING" + ",perf_raw::r8400" // topdown-heavy-ops + ",perf_raw::r8500" // topdown-br-mispredict + ",perf_raw::r8600" // topdown-fetch-lat + ",perf_raw::r8700", // topdown-mem-bound + // res_top + { "retiring", "backend_bound", "frontend_bound", "bad_speculation" }, + // res_all + { "retiring", + "backend_bound", + "frontend_bound", + "bad_speculation", + "branch_mispredict", + "machine_clears", + "frontend_latency", + "frontend_bandwidth", + "memory_bound", + "core_bound", + "light_ops", + "heavy_ops" } + ) +{} + +bool SapphireRapidsTopdown::check_for_disabled_multiplex() const +{ + return true; } -std::vector -SapphireRapidsTopdown::compute_toplevel(const std::vector &rec) { - std::vector ret; - - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_int_misc_uop_dropping = - get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); - - // Check if any Variant is empty (use .empty()) - bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || - v_bad_spec.empty() || v_retiring.empty() || - v_int_misc_uop_dropping.empty() || - v_slots_or_info_thread_slots.empty(); - // Check if all Variants are greater than 0 when casted to doubles (use - // .to_double()) - bool is_nonzero = - v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 && - v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 && - v_int_misc_uop_dropping.to_double() > 0.0 && - v_slots_or_info_thread_slots.to_double() > 0.0; - - // Check if bad values were obtained - if (is_incomplete || !is_nonzero) - return ret; +std::vector SapphireRapidsTopdown::compute_toplevel(const std::vector& rec) +{ + std::vector ret; + + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); + Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); + Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); + Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); + Variant v_int_misc_uop_dropping = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); + + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty() + || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty(); + // Check if all Variants are greater than 0 when casted to doubles (use + // .to_double()) + bool is_nonzero = v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 && v_bad_spec.to_double() > 0.0 + && v_retiring.to_double() > 0.0 && v_int_misc_uop_dropping.to_double() > 0.0 + && v_slots_or_info_thread_slots.to_double() > 0.0; + + // Check if bad values were obtained + if (is_incomplete || !is_nonzero) + return ret; + + // Perform toplevel calcs + double toplevel_sum = + (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double()); + + double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); + double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) + - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double()); + double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); + double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0); + + // Add toplevel metrics to vector of Entry + ret.reserve(4); + ret.push_back(Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0)))); + ret.push_back(Entry(m_result_attrs["backend_bound"], Variant(std::max(backend_bound, 0.0)))); + ret.push_back(Entry(m_result_attrs["frontend_bound"], Variant(std::max(frontend_bound, 0.0)))); + ret.push_back(Entry(m_result_attrs["bad_speculation"], Variant(std::max(bad_speculation, 0.0)))); - // Perform toplevel calcs - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - - double retiring = (v_retiring.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) - - (v_int_misc_uop_dropping.to_double() / - v_slots_or_info_thread_slots.to_double()); - double backend_bound = (v_be_bound.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double bad_speculation = - std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0); - - // Add toplevel metrics to vector of Entry - ret.reserve(4); - ret.push_back( - Entry(m_result_attrs["retiring"], Variant(std::max(retiring, 0.0)))); - ret.push_back(Entry(m_result_attrs["backend_bound"], - Variant(std::max(backend_bound, 0.0)))); - ret.push_back(Entry(m_result_attrs["frontend_bound"], - Variant(std::max(frontend_bound, 0.0)))); - ret.push_back(Entry(m_result_attrs["bad_speculation"], - Variant(std::max(bad_speculation, 0.0)))); - - return ret; + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const { - return 4; +std::size_t SapphireRapidsTopdown::get_num_expected_toplevel() const +{ + return 4; } -std::vector -SapphireRapidsTopdown::compute_retiring(const std::vector &rec) { - std::vector ret; - - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400"); - - // Check if any Variant is empty (use .empty()) - bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || - v_bad_spec.empty() || v_retiring.empty() || - v_slots_or_info_thread_slots.empty() || - v_heavy_ops.empty(); - - // Check if bad values were obtained - if (is_incomplete) - return ret; +std::vector SapphireRapidsTopdown::compute_retiring(const std::vector& rec) +{ + std::vector ret; + + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); + Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); + Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); + Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); + Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400"); + + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty() + || v_slots_or_info_thread_slots.empty() || v_heavy_ops.empty(); - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - // Copied from compute_toplevel - double retiring = (v_retiring.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); + // Check if bad values were obtained + if (is_incomplete) + return ret; - double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double light_ops = std::max(0.0, retiring - heavy_ops); + double toplevel_sum = + (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double()); + // Copied from compute_toplevel + double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); - // Add toplevel metrics to vector of Entry - ret.reserve(2); - ret.push_back( - Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0)))); - ret.push_back( - Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0)))); + double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); + double light_ops = std::max(0.0, retiring - heavy_ops); - return ret; + // Add toplevel metrics to vector of Entry + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["heavy_ops"], Variant(std::max(heavy_ops, 0.0)))); + ret.push_back(Entry(m_result_attrs["light_ops"], Variant(std::max(light_ops, 0.0)))); + + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const { - return 2; +std::size_t SapphireRapidsTopdown::get_num_expected_retiring() const +{ + return 2; } -std::vector -SapphireRapidsTopdown::compute_backend_bound(const std::vector &rec) { - std::vector ret; - - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700"); - - // Check if any Variant is empty (use .empty()) - bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || - v_bad_spec.empty() || v_retiring.empty() || - v_slots_or_info_thread_slots.empty() || - v_memory_bound.empty(); - - // Check if bad values were obtained - if (is_incomplete) - return ret; +std::vector SapphireRapidsTopdown::compute_backend_bound(const std::vector& rec) +{ + std::vector ret; + + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); + Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); + Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); + Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); + Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700"); + + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty() + || v_slots_or_info_thread_slots.empty() || v_memory_bound.empty(); - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - // Copied from compute_toplevel - double backend_bound = (v_be_bound.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); + // Check if bad values were obtained + if (is_incomplete) + return ret; - double memory_bound = (v_memory_bound.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double core_bound = std::max(0.0, backend_bound - memory_bound); + double toplevel_sum = + (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double()); + // Copied from compute_toplevel + double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); - // Add toplevel metrics to vector of Entry - ret.reserve(2); - ret.push_back(Entry(m_result_attrs["memory_bound"], - Variant(std::max(memory_bound, 0.0)))); - ret.push_back( - Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0)))); + double memory_bound = (v_memory_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); + double core_bound = std::max(0.0, backend_bound - memory_bound); - return ret; + // Add toplevel metrics to vector of Entry + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["memory_bound"], Variant(std::max(memory_bound, 0.0)))); + ret.push_back(Entry(m_result_attrs["core_bound"], Variant(std::max(core_bound, 0.0)))); + + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const { - return 2; +std::size_t SapphireRapidsTopdown::get_num_expected_backend_bound() const +{ + return 2; } -std::vector -SapphireRapidsTopdown::compute_frontend_bound(const std::vector &rec) { - std::vector ret; - - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_int_misc_uop_dropping = - get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); - Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600"); - - // Check if any Variant is empty (use .empty()) - bool is_incomplete = - v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || - v_retiring.empty() || v_int_misc_uop_dropping.empty() || - v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty(); - - // Check if bad values were obtained - if (is_incomplete) - return ret; +std::vector SapphireRapidsTopdown::compute_frontend_bound(const std::vector& rec) +{ + std::vector ret; + + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); + Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); + Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); + Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); + Variant v_int_misc_uop_dropping = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); + Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600"); + + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty() + || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty() + || v_fetch_latency.empty(); - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - // Copied from compute_toplevel - double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) - - (v_int_misc_uop_dropping.to_double() / - v_slots_or_info_thread_slots.to_double()); + // Check if bad values were obtained + if (is_incomplete) + return ret; - double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) - - (v_int_misc_uop_dropping.to_double() / - v_slots_or_info_thread_slots.to_double()); + double toplevel_sum = + (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double()); + // Copied from compute_toplevel + double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) + - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double()); - double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency); + double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) + - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double()); - // Add toplevel metrics to vector of Entry - ret.reserve(2); - ret.push_back(Entry(m_result_attrs["frontend_latency"], - Variant(std::max(fetch_latency, 0.0)))); - ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], - Variant(std::max(fetch_bandwidth, 0.0)))); + double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency); - return ret; + // Add toplevel metrics to vector of Entry + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["frontend_latency"], Variant(std::max(fetch_latency, 0.0)))); + ret.push_back(Entry(m_result_attrs["frontend_bandwidth"], Variant(std::max(fetch_bandwidth, 0.0)))); + + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const { - return 2; +std::size_t SapphireRapidsTopdown::get_num_expected_frontend_bound() const +{ + return 2; } -std::vector -SapphireRapidsTopdown::compute_bad_speculation(const std::vector &rec) { - std::vector ret; - - // Get PAPI metrics for toplevel calculations - Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); - Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); - Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); - Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); - Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); - Variant v_int_misc_uop_dropping = - get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); - Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500"); - - // Check if any Variant is empty (use .empty()) - bool is_incomplete = - v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || - v_retiring.empty() || v_int_misc_uop_dropping.empty() || - v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty(); - - // Check if bad values were obtained - if (is_incomplete) - return ret; +std::vector SapphireRapidsTopdown::compute_bad_speculation(const std::vector& rec) +{ + std::vector ret; + + // Get PAPI metrics for toplevel calculations + Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots"); + Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring"); + Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec"); + Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound"); + Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound"); + Variant v_int_misc_uop_dropping = get_val_from_rec(rec, "INT_MISC:UOP_DROPPING"); + Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500"); + + // Check if any Variant is empty (use .empty()) + bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() || v_retiring.empty() + || v_int_misc_uop_dropping.empty() || v_slots_or_info_thread_slots.empty() + || v_branch_mispredict.empty(); + + // Check if bad values were obtained + if (is_incomplete) + return ret; + + // Perform toplevel calcs + double toplevel_sum = + (v_retiring.to_double() + v_bad_spec.to_double() + v_fe_bound.to_double() + v_be_bound.to_double()); + + double retiring = (v_retiring.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); + double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) + - (v_int_misc_uop_dropping.to_double() / v_slots_or_info_thread_slots.to_double()); + double backend_bound = (v_be_bound.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); + double bad_speculation = std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0); + + double branch_mispredict = + (v_branch_mispredict.to_double() / toplevel_sum) + (0 * v_slots_or_info_thread_slots.to_double()); + double machine_clears = std::max(0.0, bad_speculation - branch_mispredict); + + // Add toplevel metrics to vector of Entry + ret.reserve(2); + ret.push_back(Entry(m_result_attrs["branch_mispredict"], Variant(std::max(branch_mispredict, 0.0)))); + ret.push_back(Entry(m_result_attrs["machine_clears"], Variant(std::max(machine_clears, 0.0)))); - // Perform toplevel calcs - double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() + - v_fe_bound.to_double() + v_be_bound.to_double()); - - double retiring = (v_retiring.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) - - (v_int_misc_uop_dropping.to_double() / - v_slots_or_info_thread_slots.to_double()); - double backend_bound = (v_be_bound.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double bad_speculation = - std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0); - - double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) + - (0 * v_slots_or_info_thread_slots.to_double()); - double machine_clears = std::max(0.0, bad_speculation - branch_mispredict); - - // Add toplevel metrics to vector of Entry - ret.reserve(2); - ret.push_back(Entry(m_result_attrs["branch_mispredict"], - Variant(std::max(branch_mispredict, 0.0)))); - ret.push_back(Entry(m_result_attrs["machine_clears"], - Variant(std::max(machine_clears, 0.0)))); - - return ret; + return ret; } -std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const { - return 2; +std::size_t SapphireRapidsTopdown::get_num_expected_bad_speculation() const +{ + return 2; } } // namespace topdown diff --git a/src/services/topdown/TopdownCalculator.cpp b/src/services/topdown/TopdownCalculator.cpp index bbfa386fd..ab5ab2711 100644 --- a/src/services/topdown/TopdownCalculator.cpp +++ b/src/services/topdown/TopdownCalculator.cpp @@ -5,87 +5,96 @@ #include -namespace cali { -namespace topdown { +namespace cali +{ +namespace topdown +{ -Variant TopdownCalculator::get_val_from_rec(const std::vector &rec, - const char *name) { - Variant ret; +Variant TopdownCalculator::get_val_from_rec(const std::vector& rec, const char* name) +{ + Variant ret; - auto c_it = m_counter_attrs.find(name); - if (c_it == m_counter_attrs.end()) - return ret; + auto c_it = m_counter_attrs.find(name); + if (c_it == m_counter_attrs.end()) + return ret; - cali_id_t attr_id = c_it->second.id(); + cali_id_t attr_id = c_it->second.id(); - auto it = std::find_if(rec.begin(), rec.end(), [attr_id](const Entry &e) { - return e.attribute() == attr_id; - }); + auto it = std::find_if(rec.begin(), rec.end(), [attr_id](const Entry& e) { return e.attribute() == attr_id; }); - if (it != rec.end()) - ret = it->value(); - else - ++m_counters_not_found[std::string(name)]; + if (it != rec.end()) + ret = it->value(); + else + ++m_counters_not_found[std::string(name)]; - return ret; + return ret; } -TopdownCalculator::TopdownCalculator(IntelTopdownLevel level, - const char *top_counters, - const char *all_counters, - std::vector &&res_top, - std::vector &&res_all) - : m_level(level), m_top_counters(top_counters), - m_all_counters(all_counters), m_res_top(res_top), m_res_all(res_all) {} - -TopdownCalculator::TopdownCalculator(IntelTopdownLevel level) - : m_level(level) {} - -bool TopdownCalculator::find_counter_attrs(CaliperMetadataAccessInterface &db) { - const char *list = (m_level == All ? m_all_counters : m_top_counters); - auto counters = StringConverter(list).to_stringlist(); - - for (const auto &s : counters) { - Attribute attr = db.get_attribute(std::string("sum#papi.") + s); - - if (!attr) - attr = db.get_attribute(std::string("papi.") + s); - if (!attr) { - Log(0).stream() << "topdown: " << s << " counter attribute not found!" - << std::endl; - return false; +TopdownCalculator::TopdownCalculator( + IntelTopdownLevel level, + const char* top_counters, + const char* all_counters, + std::vector&& res_top, + std::vector&& res_all +) + : m_level(level), m_top_counters(top_counters), m_all_counters(all_counters), m_res_top(res_top), m_res_all(res_all) +{} + +TopdownCalculator::TopdownCalculator(IntelTopdownLevel level) : m_level(level) +{} + +bool TopdownCalculator::find_counter_attrs(CaliperMetadataAccessInterface& db) +{ + const char* list = (m_level == All ? m_all_counters : m_top_counters); + auto counters = StringConverter(list).to_stringlist(); + + for (const auto& s : counters) { + Attribute attr = db.get_attribute(std::string("sum#papi.") + s); + + if (!attr) + attr = db.get_attribute(std::string("papi.") + s); + if (!attr) { + Log(0).stream() << "topdown: " << s << " counter attribute not found!" << std::endl; + return false; + } + + m_counter_attrs[s] = attr; } - m_counter_attrs[s] = attr; - } - - return true; + return true; } -void TopdownCalculator::make_result_attrs(CaliperMetadataAccessInterface &db) { - std::vector &res = (m_level == Top ? m_res_top : m_res_all); +void TopdownCalculator::make_result_attrs(CaliperMetadataAccessInterface& db) +{ + std::vector& res = (m_level == Top ? m_res_top : m_res_all); - for (const char *s : res) { - m_result_attrs[std::string(s)] = - db.create_attribute(std::string("topdown.") + s, CALI_TYPE_DOUBLE, - CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS); - } + for (const char* s : res) { + m_result_attrs[std::string(s)] = db.create_attribute( + std::string("topdown.") + s, + CALI_TYPE_DOUBLE, + CALI_ATTR_ASVALUE | CALI_ATTR_SKIP_EVENTS + ); + } } -const std::map & -TopdownCalculator::get_counters_not_found() const { - return m_counters_not_found; +const std::map& TopdownCalculator::get_counters_not_found() const +{ + return m_counters_not_found; } -const char *TopdownCalculator::get_counters() const { - if (m_level == All) { - return m_all_counters; - } else { - return m_top_counters; - } +const char* TopdownCalculator::get_counters() const +{ + if (m_level == All) { + return m_all_counters; + } else { + return m_top_counters; + } } -IntelTopdownLevel TopdownCalculator::get_level() const { return m_level; } +IntelTopdownLevel TopdownCalculator::get_level() const +{ + return m_level; +} } // namespace topdown } // namespace cali \ No newline at end of file diff --git a/src/services/topdown/TopdownCalculator.h b/src/services/topdown/TopdownCalculator.h index 9841580ec..0bf292641 100644 --- a/src/services/topdown/TopdownCalculator.h +++ b/src/services/topdown/TopdownCalculator.h @@ -19,74 +19,99 @@ */ // clang-format on -namespace cali { -namespace topdown { +namespace cali +{ +namespace topdown +{ enum IntelTopdownLevel { All = 1, Top = 2 }; -class TopdownCalculator { +class TopdownCalculator +{ protected: - IntelTopdownLevel m_level; - const char *m_top_counters; - const char *m_all_counters; + IntelTopdownLevel m_level; - std::vector m_res_top; - std::vector m_res_all; + const char* m_top_counters; + const char* m_all_counters; - std::map m_counter_attrs; - std::map m_result_attrs; + std::vector m_res_top; + std::vector m_res_all; - std::map m_counters_not_found; + std::map m_counter_attrs; + std::map m_result_attrs; - Variant get_val_from_rec(const std::vector &rec, const char *name); + std::map m_counters_not_found; - TopdownCalculator(IntelTopdownLevel level, const char *top_counters, - const char *all_counters, - std::vector &&res_top, - std::vector &&res_all); + Variant get_val_from_rec(const std::vector& rec, const char* name); + + TopdownCalculator( + IntelTopdownLevel level, + const char* top_counters, + const char* all_counters, + std::vector&& res_top, + std::vector&& res_all + ); public: - TopdownCalculator(IntelTopdownLevel level); - virtual ~TopdownCalculator() = default; + TopdownCalculator(IntelTopdownLevel level); + + virtual ~TopdownCalculator() = default; - virtual bool check_for_disabled_multiplex() const = 0; + // Returns true if PAPI multiplexing cannot be used for the + // counters and/or architecture needed for the subclass + virtual bool check_for_disabled_multiplex() const = 0; - virtual std::vector - compute_toplevel(const std::vector &rec) = 0; + // Computes the L1 topdown metrics using the counters contained + // in the Caliper Entries. + virtual std::vector compute_toplevel(const std::vector& rec) = 0; - virtual std::size_t get_num_expected_toplevel() const = 0; + // Returns the expected size of the vectoor returned from + // compute_toplevel + virtual std::size_t get_num_expected_toplevel() const = 0; - virtual std::vector - compute_retiring(const std::vector &rec) = 0; + // Computes the topdown metrics beneath "Retiring" in the + // topdown hierarchy for the given architecture + virtual std::vector compute_retiring(const std::vector& rec) = 0; - virtual std::size_t get_num_expected_retiring() const = 0; + // Returns the expected size of the vector returned from + // compute_retiring + virtual std::size_t get_num_expected_retiring() const = 0; - virtual std::vector - compute_backend_bound(const std::vector &rec) = 0; + // Computes the topdown metrics beneath "Backend bound" in the + // topdown hierarchy for the given architecture + virtual std::vector compute_backend_bound(const std::vector& rec) = 0; - virtual std::size_t get_num_expected_backend_bound() const = 0; + // Returns the expected size of the vector returned from + // compute_backend_bounnd + virtual std::size_t get_num_expected_backend_bound() const = 0; - virtual std::vector - compute_frontend_bound(const std::vector &rec) = 0; + // Computes the topdown metrics beneath "Frontend bound" in the + // topdown hierarchy for the given architecture + virtual std::vector compute_frontend_bound(const std::vector& rec) = 0; - virtual std::size_t get_num_expected_frontend_bound() const = 0; + // Returns the expected size of the vector returned from + // compute_frontend_bounnd + virtual std::size_t get_num_expected_frontend_bound() const = 0; - virtual std::vector - compute_bad_speculation(const std::vector &rec) = 0; + // Computes the topdown metrics beneath "Bad speculation" in the + // topdown hierarchy for the given architecture + virtual std::vector compute_bad_speculation(const std::vector& rec) = 0; - virtual std::size_t get_num_expected_bad_speculation() const = 0; + // Returns the expected size of the vector returned from + // compute_bad_speculation + virtual std::size_t get_num_expected_bad_speculation() const = 0; - bool find_counter_attrs(CaliperMetadataAccessInterface &db); + bool find_counter_attrs(CaliperMetadataAccessInterface& db); - void make_result_attrs(CaliperMetadataAccessInterface &db); + void make_result_attrs(CaliperMetadataAccessInterface& db); - const std::map &get_counters_not_found() const; + const std::map& get_counters_not_found() const; - const char *get_counters() const; + const char* get_counters() const; - IntelTopdownLevel get_level() const; + IntelTopdownLevel get_level() const; }; } // namespace topdown