sel4bench.h: update documentation and style

* Expand documentation of `sel4bench_enable_counters()`.
* Document `sel4bench_get_num_counter_chunks()`.
* Add missing @param and @return notations.
* Consistently use plurals in the strings for generic event names.
* Use Markdown backticks for literals (e.g., symbol names).
* Keep @param and @return descriptions terse (they should be noun
  phrases appropriate for tool tips).  Elaborations go in the function
  description.
* Don't abbreviate `sel4bench_init()` and `sel4bench_destroy()` as
  `_init()` and `_destroy()`, respectively.  Use the full names.
* Move documentation of constraint on number of counters to
  `sel4bench_get_num_counters()`, instead of duplicating it in two other
  functions.
* Emphasise that the cycle counter is not a performance counter.
* Clarify reference to ARM1136 platform, a.k.a. "kzm".

Flag open questions:
* Should some of these prototypes and constant objects be moved to a
  "private" header file?

Minor style issues:
* Perform minor wordsmithing.
* Use hyphens more idiomatically.
* Wrap lines at 80 columns.
* Use two spaces between sentences for readability.
diff --git a/libsel4bench/include/sel4bench/sel4bench.h b/libsel4bench/include/sel4bench/sel4bench.h
index 7c8e02d..f6eac71 100644
--- a/libsel4bench/include/sel4bench/sel4bench.h
+++ b/libsel4bench/include/sel4bench/sel4bench.h
@@ -27,27 +27,27 @@
  * monitoring counters (PMCs) in modern IA-32 and ARM processors, so that you
  * can measure the performance of your software.  It will also work out whether
  * certain operations need to be done in kernel mode, and perform kernel code
- * injection calls to make them happen. As a result, expect that any library
- * call could potentially result in a syscall. (This is of particular note on
- * the ARM1136, for which even reading the cycle counter must be done in kernel
- * mode.)
+ * injection calls to make them happen.  As a result, expect that any library
+ * call could potentially result in a syscall.  (This is of particular note on
+ * the KZM/ARM1136, for which even reading the cycle counter must be done in
+ * kernel mode.)
  *
  * It also goes out of its way to ensure that there's always a cycle counter
- * available for use. _init() will start this running, and _destroy() will tear
- * it down, if necessary.
+ * available for use.  `sel4bench_init()` will start this running, and
+ * `sel4bench_destroy()` will tear it down, if necessary.
  *
  * Notes:
  * - Overflow is completely ignored, even on processors that only support
- *   32-bit counters (and thus where there is space to overflow into). If you
+ *   32-bit counters (and thus where there is space to overflow into).  If you
  *   are doing something that might overflow a counter, it's up to you to deal
  *   with that possibility.
  * - Everything is zero-indexed.
  */
 
 /*
- * CPP constants for events that are common to all architecture
- * variants.
- * Additional events are architecture (and potentially processor) specific.
+ * CPP constants for events that are common to all architecture variants.
+ *
+ * Additional events are architecture- (and potentially processor-) specific.
  * These may be defined in architecture or processor header files.
  */
 static UNUSED event_id_t GENERIC_EVENTS[] = {
@@ -66,8 +66,8 @@
     "L1 i-tlb misses",
     "L1 d-tlb misses",
     "Instructions",
-    "Branch mispredict",
-    "Memory access",
+    "Branch mispredictions",
+    "Memory accesses",
 };
 
 static_assert(ARRAY_SIZE(GENERIC_EVENTS) == ARRAY_SIZE(GENERIC_EVENT_NAMES),
@@ -77,25 +77,28 @@
 #define SEL4BENCH_NUM_GENERIC_EVENTS ARRAY_SIZE(GENERIC_EVENTS)
 
 /**
- * Initialise the sel4bench library. Nothing else is guaranteed to work, and
+ * Initialise the sel4bench library.  Nothing else is guaranteed to work, and
  * may produce strange failures, if you don't do this first.
- * Starts the cycle counter, which is guaranteed to run until _destroy() is
- * called.
+ *
+ * Starts the cycle counter, which is guaranteed to run until
+ * `sel4bench_destroy()` is called.
  */
 static UNUSED void sel4bench_init();
 
 /**
- * Tear down the sel4bench library. Nothing else is guaranteed to work, and may
+ * Tear down the sel4bench library.  Nothing else is guaranteed to work, and may
  * produce strange failures, after you do this.
  */
 static UNUSED void sel4bench_destroy();
 
 /**
- * Query the cycle counter. If said counter needs starting, _init() will have
- * taken care of it.
+ * Query the cycle counter.  If said counter needs starting, `sel4bench_init()`
+ * will have taken care of it.
  *
- * @return The current cycle count. This might be since _init(), if the cycle
- *         counter needs explicit starting, or since bootup, if it freewheels.
+ * The returned cycle count might be since `sel4bench_init()`, if the cycle
+ * counter needs explicit starting, or since bootup, if it freewheels.
+ *
+ * @return current cycle count
  */
 static UNUSED ccnt_t sel4bench_get_cycle_count();
 
@@ -103,98 +106,141 @@
  * Query how many performance counters are supported on this CPU, excluding the
  * cycle counter.
  *
- * @return Processor's available counters.
+ * Note that the return value is of type `seL4_Word`; consequently, this library
+ * supports a number of counters less than or equal to the machine word size in
+ * bits.
+
+ * @return quantity of counters on this CPU
  */
 static UNUSED seL4_Word sel4bench_get_num_counters();
 
 /**
- * Query the description of a counter
- * @param counter The counter to query
- * @return An ASCII string prepresentation of the counters description, or NULL
- *         if the counter does not exist.
+ * Query the description of a counter.
+ *
+ * @param counter counter to query
+ *
+ * @return ASCII string representation of counter's description; `NULL` if
+ * counter does not exist
  */
 const char *sel4bench_get_counter_description(counter_t counter);
 
 /**
  * Query the value of a counter.
  *
- * @param counter The counter to query.
- * @return The value of the counter.
+ * @param counter counter to query
+ *
+ * @return counter value
  */
 static UNUSED ccnt_t sel4bench_get_counter(counter_t counter);
 
 /**
  * Query the value of a set of counters.
  *
- * @param counters A bitfield indicating which counter(s) to query.
- * @param values   An array of ccnt_t, of length equal to the
- *                 highest counter index to be read (to a maximum of
- *                 sel4bench_get_num_counters()). Each counter to be read
- *                 will be written to its corresponding index in this array.
- * @return The current cycle count, as sel4bench_get_cycle_count()
+ * `values` must point to an array of a length at least equal to the highest
+ * counter index to be read (to a maximum of `sel4bench_get_num_counters()`).
+ * Each counter to be read will be written to its corresponding index in this
+ * array.
+ *
+ * @param counters bitfield indicating which counter(s) in `values` to query
+ * @param values   array of counters
+ *
+ * @return current cycle count as in `sel4bench_get_cycle_count()`
  */
 static UNUSED ccnt_t sel4bench_get_counters(counter_bitfield_t counters,
                                             ccnt_t *values);
 
 /**
- * Assign a counter to track a specific event. Events are processor-specific,
+ * Assign a counter to track a specific event.  Events are processor-specific,
  * though some common ones might be exposed through preprocessor constants.
  *
- * @param counter The counter to configure.
- * @param event The event to track.
+ * @param counter counter to configure
+ * @param event   event to track
  */
 static UNUSED void sel4bench_set_count_event(counter_t counter, event_id_t id);
 
 /**
- * Start counting events on a set of performance counters. The argument is a
- * bitfield detailing that set. (Note that this means the library supports a
- * number of counters less than or equal to the machine word size in bits.)
+ * Start counting events on a set of performance counters.
  *
- * @param counters A bitfield indicating which counter(s) to start.
+ * @param counters bitfield indicating which counter(s) to start
  */
 static UNUSED void sel4bench_start_counters(counter_bitfield_t counters);
 
 /**
- * Stop counting events on a set of performance counters. The argument is a
- * bitfield detailing that set. (Note that this means the library supports a
- * number of counters less than or equal to the machine word size in bits.)
+ * Stop counting events on a set of performance counters.
  *
- * Some processors (notably, the ARM1136) may not support this operation.
+ * Note: Some processors (notably, the KZM/ARM1136) may not support this
+ * operation.
  *
- * @param counters A bitfield indicating which counter(s) to stop.
+ * @param counters bitfield indicating which counter(s) to stop
  */
 static UNUSED void sel4bench_stop_counters(counter_bitfield_t counters);
 
 /**
- * Reset all performance counters to zero (not including the cycle counter).
+ * Reset all performance counters to zero.  Note that the cycle counter is not a
+ * performance counter, and is not reset.
  *
  */
 static UNUSED void sel4bench_reset_counters(void);
 
-/*
- * @return the number benchmark loops required to read a number of events
+/**
+ * Query the number of benchmark loops required to read a given number of
+ * events.
+ *
+ * @param n_counters number of counters available
+ * @param n_events   number of events of interest
+ *
+ * @return number of benchmark loops required
  */
-static inline int sel4bench_get_num_counter_chunks(seL4_Word n_counters, seL4_Word n_events)
+static inline int sel4bench_get_num_counter_chunks(seL4_Word n_counters,
+                                                   seL4_Word n_events)
 {
     return DIV_ROUND_UP(n_events, n_counters);
 }
 
-/*
+/**
  * Enable a chunk of the event counters passed in.
  *
- * @param chunk      events is divided into sel4bench_get_num_counter_chunks, determined by
- *                   ceiling(n_events / n_counters).
- * @param n_counters value returned by sel4bench_get_num_counters(), passed in to avoid
- *                         expensive operations multiple times.
- * @return a mask    used to manipulate the counters enabled.
+ * A "chunk" is a quantity of events not larger than the number of performance
+ * counters available.  Because we can be interested in more events than there
+ * are counters, the events are broken into numbered chunks (zero-indexed).  The
+ * quantity of chunks is ceil(n_events / n_counters).
+ *
+ * Imagine we had 10 events to track but n_counters was only 8 (i.e., an 8-bit
+ * machine).
+ *
+ *     +--chunk 1-+--chunk 0-+
+ *     | xxxxxxxx | xxxxxxxx |
+ *     +---------------------+
+ *
+ * sel4bench_enable_counters(10, events, 0, 8) would return 255:
+ *
+ *     +--chunk 1-+--chunk 0-+
+ *     | 00000000 | 11111111 |
+ *     +---------------------+
+ *
+ * sel4bench_enable_counters(10, events, 1, 8) would return 3:
+ *
+ *     +--chunk 1-+--chunk 0-+
+ *     | 00000011 | 00000000 |
+ *     +---------------------+
+ *
+ * `n_counters` is a parameter because calling `sel4bench_get_num_counters()`
+ * can be expensive, but it should be the same as the function's return value.
+ *
+ * @param n_events   number of events of interest
+ * @param event      events to track
+ * @param chunk      chunk number to enable
+ * @param n_counters number of counters available
+ *
+ * @return mask usable to manipulate the counters enabled
  */
-static inline counter_bitfield_t sel4bench_enable_counters(seL4_Word n_events, event_id_t events[n_events],
-                                                           seL4_Word chunk,
-                                                           seL4_Word n_counters)
+static inline
+counter_bitfield_t sel4bench_enable_counters(seL4_Word n_events,
+                                             event_id_t *events,
+                                             seL4_Word chunk,
+                                             seL4_Word n_counters)
 {
     assert(chunk < sel4bench_get_num_counter_chunks(n_counters, n_events));
-    /* This value is passed in as it can be expensive to call, but
-     * should not be different to the returned function result */
     assert(n_counters == sel4bench_get_num_counters());
     counter_bitfield_t mask = 0;
 
@@ -212,33 +258,50 @@
     return mask;
 }
 
-/*
- * Read and stop the counters set in mask.
+/**
+ * Read and stop the counters set in `mask`.
  *
- * @param chunk      as passed to sel4bench_enable_counters.
- * @param n_counters value returned by sel4bench_get_num_counters(), passed in to avoid
- *                         expensive operations multiple times.
- * @param mask       returned by sel4bench_enable_counters.
- * @param results    array of counter results. Must match the size of n_events as passed to
- *                   sel4bench_enable_counters.
+ * `n_counters` is a parameter because calling `sel4bench_get_num_counters()`
+ * can be expensive, but it should be the same as the function's return value.
+ *
+ * `results` must point to an array the size of n_events, as passed to
+ * `sel4bench_enable_counters()`.
+ *
+ * @param mask       as returned by `sel4bench_enable_counters()`
+ * @param chunk      as passed to `sel4bench_enable_counters()`
+ * @param n_counters number of counters available
+ * @param results    array of counter results
  */
-static inline void sel4bench_read_and_stop_counters(counter_bitfield_t mask, seL4_Word chunk, seL4_Word n_counters,
+static inline void sel4bench_read_and_stop_counters(counter_bitfield_t mask,
+                                                    seL4_Word chunk,
+                                                    seL4_Word n_counters,
                                                     ccnt_t results[])
 {
     sel4bench_get_counters(mask, &results[chunk * n_counters]);
     sel4bench_stop_counters(mask);
 }
 
-/* shortcut for calling sel4bench_enable_counters on the GENERIC_EVENTS supplied by all platforms in
- * libsel4bench */
-static inline counter_bitfield_t sel4bench_enable_generic_counters(seL4_Word chunk, seL4_Word n_counters)
+/**
+ * Call `sel4bench_enable_counters()` on the `GENERIC_EVENTS` supplied for all
+ * platforms by this library.
+ *
+ * See `sel4bench_enable_counters()` for parameters and return value.
+ */
+static inline counter_bitfield_t sel4bench_enable_generic_counters(
+    seL4_Word chunk, seL4_Word n_counters)
 {
-    return sel4bench_enable_counters(SEL4BENCH_NUM_GENERIC_EVENTS, GENERIC_EVENTS, chunk, n_counters);
+    return sel4bench_enable_counters(SEL4BENCH_NUM_GENERIC_EVENTS,
+                                     GENERIC_EVENTS, chunk, n_counters);
 }
 
-/* shortcut for calling sel4bench_enable_counters on the GENERIC_EVENTS supplied by all platforms in
- * libsel4bench */
+/**
+ * Call `sel4bench_get_num_counter_chunks()` for the `GENERIC_EVENTS` supplied
+ * for all platforms by this library.
+ *
+ * See `sel4bench_get_num_counter_chunks()` for parameters and return value.
+ */
 static inline int sel4bench_get_num_generic_counter_chunks(seL4_Word n_counters)
 {
-    return sel4bench_get_num_counter_chunks(n_counters, SEL4BENCH_NUM_GENERIC_EVENTS);
+    return sel4bench_get_num_counter_chunks(n_counters,
+                                            SEL4BENCH_NUM_GENERIC_EVENTS);
 }