|  | /* | 
|  | * Copyright 2017, Data61 | 
|  | * Commonwealth Scientific and Industrial Research Organisation (CSIRO) | 
|  | * ABN 41 687 119 230. | 
|  | * | 
|  | * This software may be distributed and modified according to the terms of | 
|  | * the BSD 2-Clause license. Note that NO WARRANTY is provided. | 
|  | * See "LICENSE_BSD2.txt" for details. | 
|  | * | 
|  | * @TAG(DATA61_BSD) | 
|  | */ | 
|  | #pragma once | 
|  |  | 
|  | #include <stddef.h> | 
|  | #include <stdint.h> | 
|  |  | 
|  | #include <sel4/sel4.h> | 
|  | #include <sel4bench/types.h> | 
|  | #include <sel4bench/arch/sel4bench.h> | 
|  | #include <utils/attribute.h> | 
|  | #include <utils/arith.h> | 
|  |  | 
|  | /** | 
|  | * @file | 
|  | * | 
|  | * libsel4bench is a library designed to abstract over the performance | 
|  | * monitoring counters (PMCs) in modern IA-32 and ARM processors, so that you | 
|  | * can measure the performance of your software.  It will also work out whether | 
|  | * certain operations need to be done in kernel mode, and perform kernel code | 
|  | * injection calls to make them happen.  As a result, expect that any library | 
|  | * call could potentially result in a syscall.  (This is of particular note on | 
|  | * the KZM/ARM1136, for which even reading the cycle counter must be done in | 
|  | * kernel mode.) | 
|  | * | 
|  | * It also goes out of its way to ensure that there's always a cycle counter | 
|  | * available for use.  `sel4bench_init()` will start this running, and | 
|  | * `sel4bench_destroy()` will tear it down, if necessary. | 
|  | * | 
|  | * Notes: | 
|  | * - Overflow is completely ignored, even on processors that only support | 
|  | *   32-bit counters (and thus where there is space to overflow into).  If you | 
|  | *   are doing something that might overflow a counter, it's up to you to deal | 
|  | *   with that possibility. | 
|  | * - Everything is zero-indexed. | 
|  | */ | 
|  |  | 
|  | /* | 
|  | * CPP constants for events that are common to all architecture variants. | 
|  | * | 
|  | * Additional events are architecture- (and potentially processor-) specific. | 
|  | * These may be defined in architecture or processor header files. | 
|  | */ | 
|  | static UNUSED event_id_t GENERIC_EVENTS[] = { | 
|  | SEL4BENCH_EVENT_CACHE_L1I_MISS, | 
|  | SEL4BENCH_EVENT_CACHE_L1D_MISS, | 
|  | SEL4BENCH_EVENT_TLB_L1I_MISS, | 
|  | SEL4BENCH_EVENT_TLB_L1D_MISS, | 
|  | SEL4BENCH_EVENT_EXECUTE_INSTRUCTION, | 
|  | SEL4BENCH_EVENT_BRANCH_MISPREDICT, | 
|  | SEL4BENCH_EVENT_MEMORY_ACCESS, | 
|  | }; | 
|  |  | 
|  | static UNUSED char *GENERIC_EVENT_NAMES[] = { | 
|  | "L1 i-cache misses", | 
|  | "L1 d-cache misses", | 
|  | "L1 i-tlb misses", | 
|  | "L1 d-tlb misses", | 
|  | "Instructions", | 
|  | "Branch mispredictions", | 
|  | "Memory accesses", | 
|  | }; | 
|  |  | 
|  | static_assert(ARRAY_SIZE(GENERIC_EVENTS) == ARRAY_SIZE(GENERIC_EVENT_NAMES), | 
|  | "event names same length as counters"); | 
|  |  | 
|  | /* Number of generic counters */ | 
|  | #define SEL4BENCH_NUM_GENERIC_EVENTS ARRAY_SIZE(GENERIC_EVENTS) | 
|  |  | 
|  | /** | 
|  | * Initialise the sel4bench library.  Nothing else is guaranteed to work, and | 
|  | * may produce strange failures, if you don't do this first. | 
|  | * | 
|  | * Starts the cycle counter, which is guaranteed to run until | 
|  | * `sel4bench_destroy()` is called. | 
|  | */ | 
|  | static UNUSED void sel4bench_init(); | 
|  |  | 
|  | /** | 
|  | * Tear down the sel4bench library.  Nothing else is guaranteed to work, and may | 
|  | * produce strange failures, after you do this. | 
|  | */ | 
|  | static UNUSED void sel4bench_destroy(); | 
|  |  | 
|  | /** | 
|  | * Query the cycle counter.  If said counter needs starting, `sel4bench_init()` | 
|  | * will have taken care of it. | 
|  | * | 
|  | * The returned cycle count might be since `sel4bench_init()`, if the cycle | 
|  | * counter needs explicit starting, or since bootup, if it freewheels. | 
|  | * | 
|  | * @return current cycle count | 
|  | */ | 
|  | static UNUSED ccnt_t sel4bench_get_cycle_count(); | 
|  |  | 
|  | /** | 
|  | * Query how many performance counters are supported on this CPU, excluding the | 
|  | * cycle counter. | 
|  | * | 
|  | * Note that the return value is of type `seL4_Word`; consequently, this library | 
|  | * supports a number of counters less than or equal to the machine word size in | 
|  | * bits. | 
|  |  | 
|  | * @return quantity of counters on this CPU | 
|  | */ | 
|  | static UNUSED seL4_Word sel4bench_get_num_counters(); | 
|  |  | 
|  | /** | 
|  | * Query the description of a counter. | 
|  | * | 
|  | * @param counter counter to query | 
|  | * | 
|  | * @return ASCII string representation of counter's description; `NULL` if | 
|  | * counter does not exist | 
|  | */ | 
|  | const char *sel4bench_get_counter_description(counter_t counter); | 
|  |  | 
|  | /** | 
|  | * Query the value of a counter. | 
|  | * | 
|  | * @param counter counter to query | 
|  | * | 
|  | * @return counter value | 
|  | */ | 
|  | static UNUSED ccnt_t sel4bench_get_counter(counter_t counter); | 
|  |  | 
|  | /** | 
|  | * Query the value of a set of counters. | 
|  | * | 
|  | * `values` must point to an array of a length at least equal to the highest | 
|  | * counter index to be read (to a maximum of `sel4bench_get_num_counters()`). | 
|  | * Each counter to be read will be written to its corresponding index in this | 
|  | * array. | 
|  | * | 
|  | * @param counters bitfield indicating which counter(s) in `values` to query | 
|  | * @param values   array of counters | 
|  | * | 
|  | * @return current cycle count as in `sel4bench_get_cycle_count()` | 
|  | */ | 
|  | static UNUSED ccnt_t sel4bench_get_counters(counter_bitfield_t counters, | 
|  | ccnt_t *values); | 
|  |  | 
|  | /** | 
|  | * Assign a counter to track a specific event.  Events are processor-specific, | 
|  | * though some common ones might be exposed through preprocessor constants. | 
|  | * | 
|  | * @param counter counter to configure | 
|  | * @param event   event to track | 
|  | */ | 
|  | static UNUSED void sel4bench_set_count_event(counter_t counter, event_id_t id); | 
|  |  | 
|  | /** | 
|  | * Start counting events on a set of performance counters. | 
|  | * | 
|  | * @param counters bitfield indicating which counter(s) to start | 
|  | */ | 
|  | static UNUSED void sel4bench_start_counters(counter_bitfield_t counters); | 
|  |  | 
|  | /** | 
|  | * Stop counting events on a set of performance counters. | 
|  | * | 
|  | * Note: Some processors (notably, the KZM/ARM1136) may not support this | 
|  | * operation. | 
|  | * | 
|  | * @param counters bitfield indicating which counter(s) to stop | 
|  | */ | 
|  | static UNUSED void sel4bench_stop_counters(counter_bitfield_t counters); | 
|  |  | 
|  | /** | 
|  | * Reset all performance counters to zero.  Note that the cycle counter is not a | 
|  | * performance counter, and is not reset. | 
|  | * | 
|  | */ | 
|  | static UNUSED void sel4bench_reset_counters(void); | 
|  |  | 
|  | /** | 
|  | * Query the number of benchmark loops required to read a given number of | 
|  | * events. | 
|  | * | 
|  | * @param n_counters number of counters available | 
|  | * @param n_events   number of events of interest | 
|  | * | 
|  | * @return number of benchmark loops required | 
|  | */ | 
|  | static inline int sel4bench_get_num_counter_chunks(seL4_Word n_counters, | 
|  | seL4_Word n_events) | 
|  | { | 
|  | return DIV_ROUND_UP(n_events, n_counters); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Enable a chunk of the event counters passed in. | 
|  | * | 
|  | * A "chunk" is a quantity of events not larger than the number of performance | 
|  | * counters available.  Because we can be interested in more events than there | 
|  | * are counters, the events are broken into numbered chunks (zero-indexed).  The | 
|  | * quantity of chunks is ceil(n_events / n_counters). | 
|  | * | 
|  | * Imagine we had 10 events to track but n_counters was only 8 (i.e., an 8-bit | 
|  | * machine). | 
|  | * | 
|  | *     +--chunk 1-+--chunk 0-+ | 
|  | *     | xxxxxxxx | xxxxxxxx | | 
|  | *     +---------------------+ | 
|  | * | 
|  | * sel4bench_enable_counters(10, events, 0, 8) would return 255: | 
|  | * | 
|  | *     +--chunk 1-+--chunk 0-+ | 
|  | *     | 00000000 | 11111111 | | 
|  | *     +---------------------+ | 
|  | * | 
|  | * sel4bench_enable_counters(10, events, 1, 8) would return 3: | 
|  | * | 
|  | *     +--chunk 1-+--chunk 0-+ | 
|  | *     | 00000011 | 00000000 | | 
|  | *     +---------------------+ | 
|  | * | 
|  | * `n_counters` is a parameter because calling `sel4bench_get_num_counters()` | 
|  | * can be expensive, but it should be the same as the function's return value. | 
|  | * | 
|  | * @param n_events   number of events of interest | 
|  | * @param event      events to track | 
|  | * @param chunk      chunk number to enable | 
|  | * @param n_counters number of counters available | 
|  | * | 
|  | * @return mask usable to manipulate the counters enabled | 
|  | */ | 
|  | static inline | 
|  | counter_bitfield_t sel4bench_enable_counters(seL4_Word n_events, | 
|  | event_id_t *events, | 
|  | seL4_Word chunk, | 
|  | seL4_Word n_counters) | 
|  | { | 
|  | assert(chunk < sel4bench_get_num_counter_chunks(n_counters, n_events)); | 
|  | assert(n_counters == sel4bench_get_num_counters()); | 
|  | counter_bitfield_t mask = 0; | 
|  |  | 
|  | for (seL4_Word i = 0; i < n_counters; i++) { | 
|  | seL4_Word counter = chunk * n_counters + i; | 
|  | if (counter >= n_events) { | 
|  | break; | 
|  | } | 
|  | sel4bench_set_count_event(i, events[counter]); | 
|  | mask |= BIT(i); | 
|  | } | 
|  |  | 
|  | sel4bench_reset_counters(); | 
|  | sel4bench_start_counters(mask); | 
|  | return mask; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Read and stop the counters set in `mask`. | 
|  | * | 
|  | * `n_counters` is a parameter because calling `sel4bench_get_num_counters()` | 
|  | * can be expensive, but it should be the same as the function's return value. | 
|  | * | 
|  | * `results` must point to an array the size of n_events, as passed to | 
|  | * `sel4bench_enable_counters()`. | 
|  | * | 
|  | * @param mask       as returned by `sel4bench_enable_counters()` | 
|  | * @param chunk      as passed to `sel4bench_enable_counters()` | 
|  | * @param n_counters number of counters available | 
|  | * @param results    array of counter results | 
|  | */ | 
|  | static inline void sel4bench_read_and_stop_counters(counter_bitfield_t mask, | 
|  | seL4_Word chunk, | 
|  | seL4_Word n_counters, | 
|  | ccnt_t results[]) | 
|  | { | 
|  | sel4bench_get_counters(mask, &results[chunk * n_counters]); | 
|  | sel4bench_stop_counters(mask); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Call `sel4bench_enable_counters()` on the `GENERIC_EVENTS` supplied for all | 
|  | * platforms by this library. | 
|  | * | 
|  | * See `sel4bench_enable_counters()` for parameters and return value. | 
|  | */ | 
|  | static inline counter_bitfield_t sel4bench_enable_generic_counters( | 
|  | seL4_Word chunk, seL4_Word n_counters) | 
|  | { | 
|  | return sel4bench_enable_counters(SEL4BENCH_NUM_GENERIC_EVENTS, | 
|  | GENERIC_EVENTS, chunk, n_counters); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Call `sel4bench_get_num_counter_chunks()` for the `GENERIC_EVENTS` supplied | 
|  | * for all platforms by this library. | 
|  | * | 
|  | * See `sel4bench_get_num_counter_chunks()` for parameters and return value. | 
|  | */ | 
|  | static inline int sel4bench_get_num_generic_counter_chunks(seL4_Word n_counters) | 
|  | { | 
|  | return sel4bench_get_num_counter_chunks(n_counters, | 
|  | SEL4BENCH_NUM_GENERIC_EVENTS); | 
|  | } |