blob: f6eac71d77d753d196f1e9642dad83c33a9d2648 [file] [log] [blame]
/*
* Copyright 2017, Data61
* Commonwealth Scientific and Industrial Research Organisation (CSIRO)
* ABN 41 687 119 230.
*
* This software may be distributed and modified according to the terms of
* the BSD 2-Clause license. Note that NO WARRANTY is provided.
* See "LICENSE_BSD2.txt" for details.
*
* @TAG(DATA61_BSD)
*/
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <sel4/sel4.h>
#include <sel4bench/types.h>
#include <sel4bench/arch/sel4bench.h>
#include <utils/attribute.h>
#include <utils/arith.h>
/**
* @file
*
* libsel4bench is a library designed to abstract over the performance
* monitoring counters (PMCs) in modern IA-32 and ARM processors, so that you
* can measure the performance of your software. It will also work out whether
* certain operations need to be done in kernel mode, and perform kernel code
* injection calls to make them happen. As a result, expect that any library
* call could potentially result in a syscall. (This is of particular note on
* the KZM/ARM1136, for which even reading the cycle counter must be done in
* kernel mode.)
*
* It also goes out of its way to ensure that there's always a cycle counter
* available for use. `sel4bench_init()` will start this running, and
* `sel4bench_destroy()` will tear it down, if necessary.
*
* Notes:
* - Overflow is completely ignored, even on processors that only support
* 32-bit counters (and thus where there is space to overflow into). If you
* are doing something that might overflow a counter, it's up to you to deal
* with that possibility.
* - Everything is zero-indexed.
*/
/*
* CPP constants for events that are common to all architecture variants.
*
* Additional events are architecture- (and potentially processor-) specific.
* These may be defined in architecture or processor header files.
*/
static UNUSED event_id_t GENERIC_EVENTS[] = {
SEL4BENCH_EVENT_CACHE_L1I_MISS,
SEL4BENCH_EVENT_CACHE_L1D_MISS,
SEL4BENCH_EVENT_TLB_L1I_MISS,
SEL4BENCH_EVENT_TLB_L1D_MISS,
SEL4BENCH_EVENT_EXECUTE_INSTRUCTION,
SEL4BENCH_EVENT_BRANCH_MISPREDICT,
SEL4BENCH_EVENT_MEMORY_ACCESS,
};
static UNUSED char *GENERIC_EVENT_NAMES[] = {
"L1 i-cache misses",
"L1 d-cache misses",
"L1 i-tlb misses",
"L1 d-tlb misses",
"Instructions",
"Branch mispredictions",
"Memory accesses",
};
static_assert(ARRAY_SIZE(GENERIC_EVENTS) == ARRAY_SIZE(GENERIC_EVENT_NAMES),
"event names same length as counters");
/* Number of generic counters */
#define SEL4BENCH_NUM_GENERIC_EVENTS ARRAY_SIZE(GENERIC_EVENTS)
/**
* Initialise the sel4bench library. Nothing else is guaranteed to work, and
* may produce strange failures, if you don't do this first.
*
* Starts the cycle counter, which is guaranteed to run until
* `sel4bench_destroy()` is called.
*/
static UNUSED void sel4bench_init();
/**
* Tear down the sel4bench library. Nothing else is guaranteed to work, and may
* produce strange failures, after you do this.
*/
static UNUSED void sel4bench_destroy();
/**
* Query the cycle counter. If said counter needs starting, `sel4bench_init()`
* will have taken care of it.
*
* The returned cycle count might be since `sel4bench_init()`, if the cycle
* counter needs explicit starting, or since bootup, if it freewheels.
*
* @return current cycle count
*/
static UNUSED ccnt_t sel4bench_get_cycle_count();
/**
* Query how many performance counters are supported on this CPU, excluding the
* cycle counter.
*
* Note that the return value is of type `seL4_Word`; consequently, this library
* supports a number of counters less than or equal to the machine word size in
* bits.
* @return quantity of counters on this CPU
*/
static UNUSED seL4_Word sel4bench_get_num_counters();
/**
* Query the description of a counter.
*
* @param counter counter to query
*
* @return ASCII string representation of counter's description; `NULL` if
* counter does not exist
*/
const char *sel4bench_get_counter_description(counter_t counter);
/**
* Query the value of a counter.
*
* @param counter counter to query
*
* @return counter value
*/
static UNUSED ccnt_t sel4bench_get_counter(counter_t counter);
/**
* Query the value of a set of counters.
*
* `values` must point to an array of a length at least equal to the highest
* counter index to be read (to a maximum of `sel4bench_get_num_counters()`).
* Each counter to be read will be written to its corresponding index in this
* array.
*
* @param counters bitfield indicating which counter(s) in `values` to query
* @param values array of counters
*
* @return current cycle count as in `sel4bench_get_cycle_count()`
*/
static UNUSED ccnt_t sel4bench_get_counters(counter_bitfield_t counters,
ccnt_t *values);
/**
* Assign a counter to track a specific event. Events are processor-specific,
* though some common ones might be exposed through preprocessor constants.
*
* @param counter counter to configure
* @param event event to track
*/
static UNUSED void sel4bench_set_count_event(counter_t counter, event_id_t id);
/**
* Start counting events on a set of performance counters.
*
* @param counters bitfield indicating which counter(s) to start
*/
static UNUSED void sel4bench_start_counters(counter_bitfield_t counters);
/**
* Stop counting events on a set of performance counters.
*
* Note: Some processors (notably, the KZM/ARM1136) may not support this
* operation.
*
* @param counters bitfield indicating which counter(s) to stop
*/
static UNUSED void sel4bench_stop_counters(counter_bitfield_t counters);
/**
* Reset all performance counters to zero. Note that the cycle counter is not a
* performance counter, and is not reset.
*
*/
static UNUSED void sel4bench_reset_counters(void);
/**
* Query the number of benchmark loops required to read a given number of
* events.
*
* @param n_counters number of counters available
* @param n_events number of events of interest
*
* @return number of benchmark loops required
*/
static inline int sel4bench_get_num_counter_chunks(seL4_Word n_counters,
seL4_Word n_events)
{
return DIV_ROUND_UP(n_events, n_counters);
}
/**
* Enable a chunk of the event counters passed in.
*
* A "chunk" is a quantity of events not larger than the number of performance
* counters available. Because we can be interested in more events than there
* are counters, the events are broken into numbered chunks (zero-indexed). The
* quantity of chunks is ceil(n_events / n_counters).
*
* Imagine we had 10 events to track but n_counters was only 8 (i.e., an 8-bit
* machine).
*
* +--chunk 1-+--chunk 0-+
* | xxxxxxxx | xxxxxxxx |
* +---------------------+
*
* sel4bench_enable_counters(10, events, 0, 8) would return 255:
*
* +--chunk 1-+--chunk 0-+
* | 00000000 | 11111111 |
* +---------------------+
*
* sel4bench_enable_counters(10, events, 1, 8) would return 3:
*
* +--chunk 1-+--chunk 0-+
* | 00000011 | 00000000 |
* +---------------------+
*
* `n_counters` is a parameter because calling `sel4bench_get_num_counters()`
* can be expensive, but it should be the same as the function's return value.
*
* @param n_events number of events of interest
* @param event events to track
* @param chunk chunk number to enable
* @param n_counters number of counters available
*
* @return mask usable to manipulate the counters enabled
*/
static inline
counter_bitfield_t sel4bench_enable_counters(seL4_Word n_events,
event_id_t *events,
seL4_Word chunk,
seL4_Word n_counters)
{
assert(chunk < sel4bench_get_num_counter_chunks(n_counters, n_events));
assert(n_counters == sel4bench_get_num_counters());
counter_bitfield_t mask = 0;
for (seL4_Word i = 0; i < n_counters; i++) {
seL4_Word counter = chunk * n_counters + i;
if (counter >= n_events) {
break;
}
sel4bench_set_count_event(i, events[counter]);
mask |= BIT(i);
}
sel4bench_reset_counters();
sel4bench_start_counters(mask);
return mask;
}
/**
* Read and stop the counters set in `mask`.
*
* `n_counters` is a parameter because calling `sel4bench_get_num_counters()`
* can be expensive, but it should be the same as the function's return value.
*
* `results` must point to an array the size of n_events, as passed to
* `sel4bench_enable_counters()`.
*
* @param mask as returned by `sel4bench_enable_counters()`
* @param chunk as passed to `sel4bench_enable_counters()`
* @param n_counters number of counters available
* @param results array of counter results
*/
static inline void sel4bench_read_and_stop_counters(counter_bitfield_t mask,
seL4_Word chunk,
seL4_Word n_counters,
ccnt_t results[])
{
sel4bench_get_counters(mask, &results[chunk * n_counters]);
sel4bench_stop_counters(mask);
}
/**
* Call `sel4bench_enable_counters()` on the `GENERIC_EVENTS` supplied for all
* platforms by this library.
*
* See `sel4bench_enable_counters()` for parameters and return value.
*/
static inline counter_bitfield_t sel4bench_enable_generic_counters(
seL4_Word chunk, seL4_Word n_counters)
{
return sel4bench_enable_counters(SEL4BENCH_NUM_GENERIC_EVENTS,
GENERIC_EVENTS, chunk, n_counters);
}
/**
* Call `sel4bench_get_num_counter_chunks()` for the `GENERIC_EVENTS` supplied
* for all platforms by this library.
*
* See `sel4bench_get_num_counter_chunks()` for parameters and return value.
*/
static inline int sel4bench_get_num_generic_counter_chunks(seL4_Word n_counters)
{
return sel4bench_get_num_counter_chunks(n_counters,
SEL4BENCH_NUM_GENERIC_EVENTS);
}