sel4bench.h: update documentation and style * Expand documentation of `sel4bench_enable_counters()`. * Document `sel4bench_get_num_counter_chunks()`. * Add missing @param and @return notations. * Consistently use plurals in the strings for generic event names. * Use Markdown backticks for literals (e.g., symbol names). * Keep @param and @return descriptions terse (they should be noun phrases appropriate for tool tips). Elaborations go in the function description. * Don't abbreviate `sel4bench_init()` and `sel4bench_destroy()` as `_init()` and `_destroy()`, respectively. Use the full names. * Move documentation of constraint on number of counters to `sel4bench_get_num_counters()`, instead of duplicating it in two other functions. * Emphasise that the cycle counter is not a performance counter. * Clarify reference to ARM1136 platform, a.k.a. "kzm". Flag open questions: * Should some of these prototypes and constant objects be moved to a "private" header file? Minor style issues: * Perform minor wordsmithing. * Use hyphens more idiomatically. * Wrap lines at 80 columns. * Use two spaces between sentences for readability.
diff --git a/libsel4bench/include/sel4bench/sel4bench.h b/libsel4bench/include/sel4bench/sel4bench.h index 7c8e02d..f6eac71 100644 --- a/libsel4bench/include/sel4bench/sel4bench.h +++ b/libsel4bench/include/sel4bench/sel4bench.h
@@ -27,27 +27,27 @@ * monitoring counters (PMCs) in modern IA-32 and ARM processors, so that you * can measure the performance of your software. It will also work out whether * certain operations need to be done in kernel mode, and perform kernel code - * injection calls to make them happen. As a result, expect that any library - * call could potentially result in a syscall. (This is of particular note on - * the ARM1136, for which even reading the cycle counter must be done in kernel - * mode.) + * injection calls to make them happen. As a result, expect that any library + * call could potentially result in a syscall. (This is of particular note on + * the KZM/ARM1136, for which even reading the cycle counter must be done in + * kernel mode.) * * It also goes out of its way to ensure that there's always a cycle counter - * available for use. _init() will start this running, and _destroy() will tear - * it down, if necessary. + * available for use. `sel4bench_init()` will start this running, and + * `sel4bench_destroy()` will tear it down, if necessary. * * Notes: * - Overflow is completely ignored, even on processors that only support - * 32-bit counters (and thus where there is space to overflow into). If you + * 32-bit counters (and thus where there is space to overflow into). If you * are doing something that might overflow a counter, it's up to you to deal * with that possibility. * - Everything is zero-indexed. */ /* - * CPP constants for events that are common to all architecture - * variants. - * Additional events are architecture (and potentially processor) specific. + * CPP constants for events that are common to all architecture variants. + * + * Additional events are architecture- (and potentially processor-) specific. * These may be defined in architecture or processor header files. */ static UNUSED event_id_t GENERIC_EVENTS[] = { @@ -66,8 +66,8 @@ "L1 i-tlb misses", "L1 d-tlb misses", "Instructions", - "Branch mispredict", - "Memory access", + "Branch mispredictions", + "Memory accesses", }; static_assert(ARRAY_SIZE(GENERIC_EVENTS) == ARRAY_SIZE(GENERIC_EVENT_NAMES), @@ -77,25 +77,28 @@ #define SEL4BENCH_NUM_GENERIC_EVENTS ARRAY_SIZE(GENERIC_EVENTS) /** - * Initialise the sel4bench library. Nothing else is guaranteed to work, and + * Initialise the sel4bench library. Nothing else is guaranteed to work, and * may produce strange failures, if you don't do this first. - * Starts the cycle counter, which is guaranteed to run until _destroy() is - * called. + * + * Starts the cycle counter, which is guaranteed to run until + * `sel4bench_destroy()` is called. */ static UNUSED void sel4bench_init(); /** - * Tear down the sel4bench library. Nothing else is guaranteed to work, and may + * Tear down the sel4bench library. Nothing else is guaranteed to work, and may * produce strange failures, after you do this. */ static UNUSED void sel4bench_destroy(); /** - * Query the cycle counter. If said counter needs starting, _init() will have - * taken care of it. + * Query the cycle counter. If said counter needs starting, `sel4bench_init()` + * will have taken care of it. * - * @return The current cycle count. This might be since _init(), if the cycle - * counter needs explicit starting, or since bootup, if it freewheels. + * The returned cycle count might be since `sel4bench_init()`, if the cycle + * counter needs explicit starting, or since bootup, if it freewheels. + * + * @return current cycle count */ static UNUSED ccnt_t sel4bench_get_cycle_count(); @@ -103,98 +106,141 @@ * Query how many performance counters are supported on this CPU, excluding the * cycle counter. * - * @return Processor's available counters. + * Note that the return value is of type `seL4_Word`; consequently, this library + * supports a number of counters less than or equal to the machine word size in + * bits. + + * @return quantity of counters on this CPU */ static UNUSED seL4_Word sel4bench_get_num_counters(); /** - * Query the description of a counter - * @param counter The counter to query - * @return An ASCII string prepresentation of the counters description, or NULL - * if the counter does not exist. + * Query the description of a counter. + * + * @param counter counter to query + * + * @return ASCII string representation of counter's description; `NULL` if + * counter does not exist */ const char *sel4bench_get_counter_description(counter_t counter); /** * Query the value of a counter. * - * @param counter The counter to query. - * @return The value of the counter. + * @param counter counter to query + * + * @return counter value */ static UNUSED ccnt_t sel4bench_get_counter(counter_t counter); /** * Query the value of a set of counters. * - * @param counters A bitfield indicating which counter(s) to query. - * @param values An array of ccnt_t, of length equal to the - * highest counter index to be read (to a maximum of - * sel4bench_get_num_counters()). Each counter to be read - * will be written to its corresponding index in this array. - * @return The current cycle count, as sel4bench_get_cycle_count() + * `values` must point to an array of a length at least equal to the highest + * counter index to be read (to a maximum of `sel4bench_get_num_counters()`). + * Each counter to be read will be written to its corresponding index in this + * array. + * + * @param counters bitfield indicating which counter(s) in `values` to query + * @param values array of counters + * + * @return current cycle count as in `sel4bench_get_cycle_count()` */ static UNUSED ccnt_t sel4bench_get_counters(counter_bitfield_t counters, ccnt_t *values); /** - * Assign a counter to track a specific event. Events are processor-specific, + * Assign a counter to track a specific event. Events are processor-specific, * though some common ones might be exposed through preprocessor constants. * - * @param counter The counter to configure. - * @param event The event to track. + * @param counter counter to configure + * @param event event to track */ static UNUSED void sel4bench_set_count_event(counter_t counter, event_id_t id); /** - * Start counting events on a set of performance counters. The argument is a - * bitfield detailing that set. (Note that this means the library supports a - * number of counters less than or equal to the machine word size in bits.) + * Start counting events on a set of performance counters. * - * @param counters A bitfield indicating which counter(s) to start. + * @param counters bitfield indicating which counter(s) to start */ static UNUSED void sel4bench_start_counters(counter_bitfield_t counters); /** - * Stop counting events on a set of performance counters. The argument is a - * bitfield detailing that set. (Note that this means the library supports a - * number of counters less than or equal to the machine word size in bits.) + * Stop counting events on a set of performance counters. * - * Some processors (notably, the ARM1136) may not support this operation. + * Note: Some processors (notably, the KZM/ARM1136) may not support this + * operation. * - * @param counters A bitfield indicating which counter(s) to stop. + * @param counters bitfield indicating which counter(s) to stop */ static UNUSED void sel4bench_stop_counters(counter_bitfield_t counters); /** - * Reset all performance counters to zero (not including the cycle counter). + * Reset all performance counters to zero. Note that the cycle counter is not a + * performance counter, and is not reset. * */ static UNUSED void sel4bench_reset_counters(void); -/* - * @return the number benchmark loops required to read a number of events +/** + * Query the number of benchmark loops required to read a given number of + * events. + * + * @param n_counters number of counters available + * @param n_events number of events of interest + * + * @return number of benchmark loops required */ -static inline int sel4bench_get_num_counter_chunks(seL4_Word n_counters, seL4_Word n_events) +static inline int sel4bench_get_num_counter_chunks(seL4_Word n_counters, + seL4_Word n_events) { return DIV_ROUND_UP(n_events, n_counters); } -/* +/** * Enable a chunk of the event counters passed in. * - * @param chunk events is divided into sel4bench_get_num_counter_chunks, determined by - * ceiling(n_events / n_counters). - * @param n_counters value returned by sel4bench_get_num_counters(), passed in to avoid - * expensive operations multiple times. - * @return a mask used to manipulate the counters enabled. + * A "chunk" is a quantity of events not larger than the number of performance + * counters available. Because we can be interested in more events than there + * are counters, the events are broken into numbered chunks (zero-indexed). The + * quantity of chunks is ceil(n_events / n_counters). + * + * Imagine we had 10 events to track but n_counters was only 8 (i.e., an 8-bit + * machine). + * + * +--chunk 1-+--chunk 0-+ + * | xxxxxxxx | xxxxxxxx | + * +---------------------+ + * + * sel4bench_enable_counters(10, events, 0, 8) would return 255: + * + * +--chunk 1-+--chunk 0-+ + * | 00000000 | 11111111 | + * +---------------------+ + * + * sel4bench_enable_counters(10, events, 1, 8) would return 3: + * + * +--chunk 1-+--chunk 0-+ + * | 00000011 | 00000000 | + * +---------------------+ + * + * `n_counters` is a parameter because calling `sel4bench_get_num_counters()` + * can be expensive, but it should be the same as the function's return value. + * + * @param n_events number of events of interest + * @param event events to track + * @param chunk chunk number to enable + * @param n_counters number of counters available + * + * @return mask usable to manipulate the counters enabled */ -static inline counter_bitfield_t sel4bench_enable_counters(seL4_Word n_events, event_id_t events[n_events], - seL4_Word chunk, - seL4_Word n_counters) +static inline +counter_bitfield_t sel4bench_enable_counters(seL4_Word n_events, + event_id_t *events, + seL4_Word chunk, + seL4_Word n_counters) { assert(chunk < sel4bench_get_num_counter_chunks(n_counters, n_events)); - /* This value is passed in as it can be expensive to call, but - * should not be different to the returned function result */ assert(n_counters == sel4bench_get_num_counters()); counter_bitfield_t mask = 0; @@ -212,33 +258,50 @@ return mask; } -/* - * Read and stop the counters set in mask. +/** + * Read and stop the counters set in `mask`. * - * @param chunk as passed to sel4bench_enable_counters. - * @param n_counters value returned by sel4bench_get_num_counters(), passed in to avoid - * expensive operations multiple times. - * @param mask returned by sel4bench_enable_counters. - * @param results array of counter results. Must match the size of n_events as passed to - * sel4bench_enable_counters. + * `n_counters` is a parameter because calling `sel4bench_get_num_counters()` + * can be expensive, but it should be the same as the function's return value. + * + * `results` must point to an array the size of n_events, as passed to + * `sel4bench_enable_counters()`. + * + * @param mask as returned by `sel4bench_enable_counters()` + * @param chunk as passed to `sel4bench_enable_counters()` + * @param n_counters number of counters available + * @param results array of counter results */ -static inline void sel4bench_read_and_stop_counters(counter_bitfield_t mask, seL4_Word chunk, seL4_Word n_counters, +static inline void sel4bench_read_and_stop_counters(counter_bitfield_t mask, + seL4_Word chunk, + seL4_Word n_counters, ccnt_t results[]) { sel4bench_get_counters(mask, &results[chunk * n_counters]); sel4bench_stop_counters(mask); } -/* shortcut for calling sel4bench_enable_counters on the GENERIC_EVENTS supplied by all platforms in - * libsel4bench */ -static inline counter_bitfield_t sel4bench_enable_generic_counters(seL4_Word chunk, seL4_Word n_counters) +/** + * Call `sel4bench_enable_counters()` on the `GENERIC_EVENTS` supplied for all + * platforms by this library. + * + * See `sel4bench_enable_counters()` for parameters and return value. + */ +static inline counter_bitfield_t sel4bench_enable_generic_counters( + seL4_Word chunk, seL4_Word n_counters) { - return sel4bench_enable_counters(SEL4BENCH_NUM_GENERIC_EVENTS, GENERIC_EVENTS, chunk, n_counters); + return sel4bench_enable_counters(SEL4BENCH_NUM_GENERIC_EVENTS, + GENERIC_EVENTS, chunk, n_counters); } -/* shortcut for calling sel4bench_enable_counters on the GENERIC_EVENTS supplied by all platforms in - * libsel4bench */ +/** + * Call `sel4bench_get_num_counter_chunks()` for the `GENERIC_EVENTS` supplied + * for all platforms by this library. + * + * See `sel4bench_get_num_counter_chunks()` for parameters and return value. + */ static inline int sel4bench_get_num_generic_counter_chunks(seL4_Word n_counters) { - return sel4bench_get_num_counter_chunks(n_counters, SEL4BENCH_NUM_GENERIC_EVENTS); + return sel4bench_get_num_counter_chunks(n_counters, + SEL4BENCH_NUM_GENERIC_EVENTS); }