[otbnsim] Collect statistics about basic blocks

Calculate the length of basic blocks and extended basic blocks and
present the findings as histogram when asked to dump statistics about
the execution.

Also include unit tests to check the counting behavior on small
examples.

Example:

```
$ hw/ip/otbn/dv/otbnsim/standalone.py build-out/sw/otbn/rsa_1024_enc_test.elf --dump-stats=-
[some output omitted]

Basic block statistics
----------------------

Number of instructions within a basic block

  # of instr.    frequency
-------------  -----------
            1         3250
            2         2367
            3         6274
            4           40
            5         6720
            6         1381
            7         1362
            8            3
            9           84
           10         2049
           12            1
           13          255
           17          757

Number of instructions within an extended basic block

  # of instr.    frequency
-------------  -----------
            1           67
            2         1090
            3         6150
            4            5
            5         6150
            6         1374
            7            2
            8         1028
            9           18
           10            1
           11            1
           12         2048
           14            1
           15           84
           19            1
           48          168
          122            3
          124           60
          125           20
          127            1
         3354            1
```

Signed-off-by: Philipp Wagner <phw@lowrisc.org>
diff --git a/hw/ip/otbn/dv/otbnsim/sim/stats.py b/hw/ip/otbn/dv/otbnsim/sim/stats.py
index 23c1948..10be8e1 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/stats.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/stats.py
@@ -11,7 +11,7 @@
 from elftools.elf.sections import SymbolTableSection  # type: ignore
 from tabulate import tabulate
 
-from .insn import JAL, JALR, LOOP, LOOPI
+from .insn import BEQ, BNE, ECALL, JAL, JALR, LOOP, LOOPI
 from .isa import OTBNInsn
 from .state import OTBNState
 
@@ -26,8 +26,14 @@
         self.func_calls = []  # type: List[Dict[str, int]]
         self.loops = []  # type: List[Dict[str, int]]
 
-    @property
-    def insn_count(self) -> int:
+        # Histogram indexed by the length of the (extended) basic block.
+        self.basic_block_histo = Counter()  # type: typing.Counter[int]
+        self.ext_basic_block_histo = Counter()  # type: typing.Counter[int]
+
+        self._current_basic_block_len = 0
+        self._current_ext_basic_block_len = 0
+
+    def get_insn_count(self) -> int:
         '''Get the number of executed instructions.'''
         return sum(self.insn_histo.values())
 
@@ -35,6 +41,13 @@
         '''Record a single stall cycle.'''
         self.stall_count += 1
 
+    def _insn_at_addr(self, addr: int) -> Optional[OTBNInsn]:
+        '''Get the instruction at a given address.'''
+        assert addr % 4 == 0
+        assert addr >= 0
+        word_addr = addr >> 2
+        return self.program[word_addr]
+
     def record_insn(self,
                     insn: OTBNInsn,
                     state_bc: OTBNState) -> None:
@@ -47,6 +60,7 @@
         pc = state_bc.pc
 
         is_jump = isinstance(insn, JAL) or isinstance(insn, JALR)
+        is_branch = isinstance(insn, BEQ) or isinstance(insn, BNE)
 
         # Instruction histogram
         self.insn_histo[insn.insn.mnemonic] += 1
@@ -77,6 +91,39 @@
                 'iterations': iterations,
             })
 
+        is_last_insn_in_loop_body = state_bc.loop_stack.is_last_insn_in_loop_body(pc)
+
+        # Basic blocks
+        # A basic block is a linear sequence of code ending with an instruction
+        # that can potentially change the control flow: a jump, a branch, the
+        # last instruction in a loop (LOOP or LOOPI) body, or an ECALL. The
+        # length of the basic block equals the number of instructions within the
+        # basic block.
+        self._current_basic_block_len += 1
+        if (is_jump or is_branch or is_last_insn_in_loop_body or
+                isinstance(insn, ECALL)):
+
+            self.basic_block_histo[self._current_basic_block_len] += 1
+            self._current_basic_block_len = 0
+
+        # Extended basic blocks
+        # An extended basic block is a sequence of one or more basic blocks
+        # which can be statically determined at compile time.
+        # Extended basic blocks end with a branch, the last instruction in a
+        # LOOP body, or an ECALL instruction.
+
+        # Determine if the current instruction is the last instruction in a LOOP
+        # body (only LOOP, not LOOPI!).
+        finishing_loop = False
+        if is_last_insn_in_loop_body:
+            loop_insn_addr = state_bc.loop_stack.stack[-1].get_loop_insn_addr()
+            finishing_loop = isinstance(self._insn_at_addr(loop_insn_addr), LOOP)
+
+        self._current_ext_basic_block_len += 1
+        if is_branch or finishing_loop or isinstance(insn, ECALL):
+            self.ext_basic_block_histo[self._current_ext_basic_block_len] += 1
+            self._current_ext_basic_block_len = 0
+
 
 def _dwarf_decode_file_line(dwarf_info: DWARFInfo,
                             address: int) -> Optional[Tuple[str, int]]:
@@ -162,6 +209,10 @@
         out += "-----------------------\n"
         out += self._dump_insn_histo()
         out += "\n\n"
+        out += "Basic block statistics\n"
+        out += "----------------------\n"
+        out += self._dump_basic_block_stats()
+        out += "\n\n"
         out += "Function call statistics\n"
         out += "------------------------\n"
         out += self._dump_function_call_stats()
@@ -174,7 +225,7 @@
         return out
 
     def _dump_execution_time(self) -> str:
-        insn_count = self._stats.insn_count
+        insn_count = self._stats.get_insn_count()
         stall_count = self._stats.stall_count
         stall_percent = stall_count / insn_count * 100
         cycles = insn_count + stall_count
@@ -193,6 +244,17 @@
         return tabulate(self._stats.insn_histo.most_common(),
                         headers=['instruction', 'count']) + "\n"
 
+    def _dump_basic_block_stats(self) -> str:
+        out = []
+        out += ["", "Number of instructions within a basic block", ""]
+        out += [tabulate(sorted(self._stats.basic_block_histo.items()),
+                         headers=['# of instr.', 'frequency'])]
+        out += ['', '']
+        out += ["Number of instructions within an extended basic block", ""]
+        out.append(tabulate(sorted(self._stats.ext_basic_block_histo.items()),
+                   headers=['# of instr.', 'frequency']))
+        return '\n'.join(out)
+
     def _dump_function_call_stats(self) -> str:
         '''Dump function call statistics'''
 
diff --git a/hw/ip/otbn/dv/otbnsim/test/stats_test.py b/hw/ip/otbn/dv/otbnsim/test/stats_test.py
index 435e237..7941196 100644
--- a/hw/ip/otbn/dv/otbnsim/test/stats_test.py
+++ b/hw/ip/otbn/dv/otbnsim/test/stats_test.py
@@ -32,6 +32,102 @@
     return _run_sim_for_stats(sim)
 
 
+def test_basic_block_stats(tmpdir: py.path.local) -> None:
+    '''Check if statistics for basic blocks are calculated correctly.'''
+
+    asm = """
+    jump:
+      /* A basic block of 4 instructions, ending with a jump. */
+      addi x7, x7, 1
+      addi x7, x7, 2
+      addi x7, x7, 3
+      jal x0, branch1
+
+    nop /* Should never be executed. */
+
+    branch1:
+      /* A basic block of 3 instructions, ending with a branch. */
+      addi x7, x7, 4
+      addi x7, x7, -10
+      beq x7, x0, branch2
+
+    branch2:
+      /* A basic block of 3 instructions, ending with a branch. */
+      addi x7, x7, 4
+      addi x7, x7, -4
+      beq x7, x0, exit
+
+    nop /* Should never be executed. */
+
+    exit:
+      ecall
+    """
+
+    stats = _simulate_asm_str(asm, tmpdir)
+
+    assert stats.get_insn_count() == 11
+    assert sorted(stats.basic_block_histo) == sorted({
+        4: 1,  # 1 basic block with 4 instructions (jump)
+        3: 2,  # 2 basic blocks with 3 instructions (branch1 and branch2)
+        1: 1   # 1 basic block with 1 instruction (exit)
+    })
+
+    assert sorted(stats.ext_basic_block_histo) == sorted({
+        7: 1,  # 1 ext. basic block with 7 instructions (jump + branch1)
+        3: 2,  # 1 ext. basic block with 3 instructions (branch2)
+        1: 1   # 1 ext. basic block with 1 instruction (exit)
+    })
+
+
+def test_basic_block_stats_loop(tmpdir: py.path.local) -> None:
+    '''Check if statistics for basic blocks LOOPs are calculated properly.'''
+
+    asm = """
+    /* Loop x7 == 3 times over a body of 2 instructions. */
+    addi x7, x0, 3
+    loop x7, 2
+      nop
+      nop
+    ecall
+    """
+
+    stats = _simulate_asm_str(asm, tmpdir)
+
+    assert stats.get_insn_count() == 9
+    assert sorted(stats.basic_block_histo) == sorted({
+        4: 1,  # 1 basic block with 4 instructions (addi + loop + 2x nop)
+        2: 1,  # 1 basic block with 2 instructions (loop body on second iter.)
+        1: 1   # 1 basic block with 1 instruction (ecall)
+    })
+
+    assert sorted(stats.ext_basic_block_histo) == sorted(stats.basic_block_histo)
+
+
+def test_basic_block_stats_loopi(tmpdir: py.path.local) -> None:
+    '''Check if statistics for basic blocks in LOOPIs are calculated properly'''
+
+    asm = """
+    /* Loop 3 times over a body of 2 instructions. */
+    loopi 3, 2
+      nop
+      nop
+    ecall
+    """
+
+    stats = _simulate_asm_str(asm, tmpdir)
+
+    assert stats.get_insn_count() == 8
+    assert sorted(stats.basic_block_histo) == sorted({
+        3: 1,  # 1 basic block with 4 instructions (addi + loop + 2x nop)
+        2: 1,  # 1 basic block with 2 instructions (loop body on second iter.)
+        1: 1   # 1 basic block with 1 instruction (ecall)
+    })
+
+    assert sorted(stats.ext_basic_block_histo) == sorted({
+        8: 1  # All instructions are statically determined.
+    })
+
+
 def test_general_and_loop(tmpdir: py.path.local) -> None:
     '''Test the collection of general statistics as well as loop stats.'''
 
@@ -41,7 +137,7 @@
 
     # General statistics
     assert stats.stall_count == 2
-    assert stats.insn_count == 28
+    assert stats.get_insn_count() == 28
     assert stats.insn_histo == {'addi': 22, 'loop': 4, 'loopi': 1, 'ecall': 1}
     assert stats.func_calls == []