[otbn] Split encoding schemes into their own YAML file

No functional change otherwise. This is just because insns.yml was
getting enormous, and it's a first stab at making things a bit more
modular.

Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/data/enc-schemes.yml b/hw/ip/otbn/data/enc-schemes.yml
new file mode 100644
index 0000000..18e5147
--- /dev/null
+++ b/hw/ip/otbn/data/enc-schemes.yml
@@ -0,0 +1,366 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+# Instruction encoding schemes for OTBN
+#
+# These are used by the instructions in insns.yml. A scheme defines the mapping
+# between instruction operands and bits in the encoding. A scheme names zero or
+# more named fields. It can also inherit from zero or more other schemes.
+#
+# The direct fields of a scheme are defined as a dictionary, mapping a field
+# name (which will be matched up with instruction operands) to a value. In
+# general, this value is itself a dictionary with the following keys:
+#
+#  bits: A list of ranges of bits. A range is written <msb>-<lsb>, where both
+#        are integers (and msb >= lsb). Multiple ranges can be separated by
+#        commas. A degenerate range (with msb == lsb) can be written as a bare
+#        integer. Required.
+#
+#  value: Optional. If specified, this should be a binary string for a fixed
+#         value for this field, prefixed with a "b" (to avoid the YAML parser
+#         reading it as a decimal number). Underscores in the string are
+#         ignored (to make it easier to show grouping) and 'x' means don't
+#         care.
+#
+#  shift: Optional. If specified, this is the number of bits to shift the
+#         encoded value left to get the logical value.
+#
+# For brevity, if value and shift have their default values, the bits string
+# can be used as the value for the field.
+#
+# A scheme can inherit from other schemes by listing their names in a 'parents'
+# attribute. If the child scheme needs to set the value of a parents' field to
+# something fixed, it can do so with the following syntax:
+#
+#     parent_name(field_name=b11101, field_name2=b111)
+#
+# The fields of a scheme are recursively defined to be its direct fields plus
+# the fields all its ancestors.
+#
+# A scheme is called complete if its fields cover the entire range of bits
+# (0-31) and partial otherwise.
+
+# A partial scheme that sets the bottom two bits to 2'b11 (as for all RISC-V
+# uncompressed instructions) and defines an 'opcode' field for bits 6-2
+# (standard for RV32I instructions)
+rv:
+  fields:
+    opcode: 6-2
+    uncomp:
+      bits: 1-0
+      value: b11
+
+# A partial scheme defining a funct3 field in bits 14-12 (used in most RV32I
+# instructions, and most BN.* custom instructions)
+funct3:
+  fields:
+    funct3: 14-12
+
+# RISC-V "R-type" encoding (reg <- fun(reg, reg))
+R:
+  parents:
+    - rv
+    - funct3
+  fields:
+    funct7: 31-25
+    rs2: 24-20
+    rs1: 19-15
+    rd: 11-7
+
+# RISC-V "I-type" encoding (reg <- fun(imm, reg))
+I:
+  parents:
+    - rv
+    - funct3
+  fields:
+    imm: 31-20
+    rs1: 19-15
+    rd: 11-7
+
+# RISC-V "I-type" encoding sub-type for shifts (reg <- fun(imm, reg))
+Is:
+  parents:
+    - rv
+    - funct3
+  fields:
+    arithmetic: 30
+    shamt: 24-20
+    rs1: 19-15
+    rd: 11-7
+    unused:
+      bits: 31,29-25
+      value: b000000
+
+# RISC-V "S-type" encoding (_ <- fun(reg, imm))
+S:
+  parents:
+    - rv
+    - funct3
+  fields:
+    imm: 31-25,11-7
+    rs2: 24-20
+    rs1: 19-15
+
+# RISC-V "B-type" encoding (like S, but different immediate layout; used for
+# branches)
+B:
+  parents:
+    - rv
+    - funct3
+  fields:
+    imm:
+      bits: 31,7,30-25,11-8
+      shift: 1
+    rs2: 24-20
+    rs1: 19-15
+
+# RISC-V "U-type" encoding (reg <- fun(imm))
+U:
+  parents:
+    - rv
+  fields:
+    imm:
+      bits: 31-12
+      shift: 12
+    rd: 11-7
+
+# RISC-V "J-type" encoding (like U, but different immediate layout; used for
+# jumps)
+J:
+  parents:
+    - rv
+  fields:
+    imm:
+      bits: 31,19-12,20,30-21
+      shift: 1
+    rd: 11-7
+
+# A partial scheme for custom instructions with opcode b00010
+custom0:
+  parents:
+    - rv(opcode=b00010)
+
+# A partial scheme for custom instructions with opcode b01010
+custom1:
+  parents:
+    - rv(opcode=b01010)
+
+# A partial scheme for custom instructions with opcode b01110
+custom2:
+  parents:
+    - rv(opcode=b01110)
+
+# A partial scheme for custom instructions with opcode b11110
+custom3:
+  parents:
+    - rv(opcode=b11110)
+
+# A partial scheme for instructions that produce a dest WDR.
+wrd:
+  fields:
+    wrd: 11-7
+
+# A partial scheme for instructions that take two source WDRs and produce a
+# dest WDR.
+wdr3:
+  parents:
+    - wrd
+  fields:
+    wrs2: 24-20
+    wrs1: 19-15
+
+# A partial scheme that defines the 'fg' field (for <flag_group> operands)
+fg:
+  fields:
+    fg: 31
+
+# A partial scheme that defines the shift fields (type and bytes)
+shift:
+  fields:
+    shift_type: 30
+    shift_bytes: 29-25
+
+# A partial scheme that defines a function field at bit 31 for OTBN logical
+# operations
+funct31:
+  fields:
+    funct31: 31
+
+# A partial scheme for specialized 2 bit function field, we need a reduced
+# size in the lower two bits of funct3 as RSHI spills over 1 bit from its
+# immediate
+funct2:
+  fields:
+    funct2: 13-12
+
+# A specialised encoding for the loop instruction (only one source, no
+# destination)
+loop:
+  parents:
+    - custom3
+    - funct2(funct2=b00)
+  fields:
+    bodysize: 31-20
+    grs: 19-15
+    fixed:
+      bits: 14,11-7
+      value: bxxxxxx
+
+# A specialised encoding for the loopi instruction (which, unusually, has 2
+# immediates)
+loopi:
+  parents:
+    - custom3
+    - funct2(funct2=b01)
+  fields:
+    bodysize: 31-20
+    iterations: 19-15,11-7
+    fixed:
+      bits: 14
+      value: bx
+
+# Used wide logical operations (bn.and, bn.or, bn.xor).
+bna:
+  parents:
+    - custom1
+    - wdr3
+    - funct3
+    - shift
+    - funct31
+
+# Used for bn.not (no second source reg).
+bnan:
+  parents:
+    - custom1
+    - shift
+    - funct31
+    - wrd
+  fields:
+    wrs1: 24-20
+    fixed:
+      bits: 19-15
+      value: bxxxxx
+
+# Used for the wide reg/reg ALU instructions.
+bnaf:
+  parents:
+    - custom1
+    - wdr3
+    - funct3
+    - shift
+    - fg
+
+# Used for the wide bn.addi and bn.subi instructions.
+bnai:
+  parents:
+    - custom1
+    - wrd
+    - funct3
+    - fg
+  fields:
+    sub: 30
+    imm: 29-20
+    wrs: 19-15
+
+# Used for bn.addm, bn.subm
+bnam:
+  parents:
+    - custom1
+    - wdr3
+    - funct3
+  fields:
+    sub: 30
+    fixed:
+      bits: 31,29-25
+      value: bxxxxxx
+
+# Used for bn.mulqacc
+bnaq:
+  parents:
+    - custom2
+    - wdr3
+  fields:
+    wb: 31-30
+    dh: 29
+    qs2: 28-27
+    qs1: 26-25
+    acc:
+      bits: 14-13
+      shift: 6
+    z: 12
+
+# Unusual scheme used for bn.rshi (the immediate bleeds into the usual funct3
+# field)
+bnr:
+  parents:
+    - custom3
+    - wdr3
+  fields:
+    imm: 31-25,14
+    funct2: 13-12
+
+# Used by bn.sel.
+bns:
+  parents:
+    - custom0
+    - wdr3
+    - funct3(funct3=b000)
+    - fg
+  fields:
+    fixed:
+      bits: 30-27
+      value: bxxxx
+    flag: 26-25
+
+# Used by bn.cmp and bn.cmpb
+bnc:
+  parents:
+    - custom0
+    - wdr3(wrd=bxxxxx)
+    - funct3
+    - shift
+    - fg
+
+# Used by bn.lid and bn.sid
+bnxid:
+  parents:
+    - custom0
+    - funct3
+  fields:
+    imm:
+      bits: 24-22,31-25
+      shift: 4
+    spp: 21
+    dpp: 20
+    rs: 19-15
+    rd: 11-7
+
+# Used by bn.mov and bn.movr
+bnmov:
+  parents:
+    - custom0
+    - funct3(funct3=b110)
+  fields:
+    indirect: 31
+    fixed_top:
+      bits: 30-22
+      value: bxxxxxxxxx
+    spp: 21
+    dpp: 20
+    src: 19-15
+    dst: 11-7
+
+# Used by bn.wsrrs and bn.wsrrw
+wcsr:
+  parents:
+    - custom0
+    - funct3(funct3=b111)
+  fields:
+    write: 31
+    wcsr: 27-20
+    wrs: 19-15
+    wrd: 11-7
+    fixed:
+      bits: 30-28
+      value: bxxx
diff --git a/hw/ip/otbn/data/insns.yml b/hw/ip/otbn/data/insns.yml
index 97082e2..959076e 100644
--- a/hw/ip/otbn/data/insns.yml
+++ b/hw/ip/otbn/data/insns.yml
@@ -29,369 +29,9 @@
     doc: |
       All Big Number (BN) instructions operate on the Wide Data Registers (WDRs).
 
-# Instruction encoding schemes
-#
-# These define the mapping between instruction operands and bits in the
-# encoding. A scheme names zero or more named fields. It can also inherit from
-# zero or more other schemes.
-#
-# The direct fields of a scheme are defined as a dictionary, mapping a field
-# name (which will be matched up with instruction operands) to a value. In
-# general, this value is itself a dictionary with the following keys:
-#
-#  bits: A list of ranges of bits. A range is written <msb>-<lsb>, where both
-#        are integers (and msb >= lsb). Multiple ranges can be separated by
-#        commas. A degenerate range (with msb == lsb) can be written as a bare
-#        integer. Required.
-#
-#  value: Optional. If specified, this should be a binary string for a fixed
-#         value for this field, prefixed with a "b" (to avoid the YAML parser
-#         reading it as a decimal number). Underscores in the string are
-#         ignored (to make it easier to show grouping) and 'x' means don't
-#         care.
-#
-#  shift: Optional. If specified, this is the number of bits to shift the
-#         encoded value left to get the logical value.
-#
-# For brevity, if value and shift have their default values, the bits string
-# can be used as the value for the field.
-#
-# A scheme can inherit from other schemes by listing their names in a 'parents'
-# attribute. If the child scheme needs to set the value of a parents' field to
-# something fixed, it can do so with the following syntax:
-#
-#     parent_name(field_name=b11101, field_name2=b111)
-#
-# The fields of a scheme are recursively defined to be its direct fields plus
-# the fields all its ancestors.
-#
-# A scheme is called complete if its fields cover the entire range of bits
-# (0-31) and partial otherwise.
-
-encoding-schemes:
-  # A partial scheme that sets the bottom two bits to 2'b11 (as for all RISC-V
-  # uncompressed instructions) and defines an 'opcode' field for bits 6-2
-  # (standard for RV32I instructions)
-  rv:
-    fields:
-      opcode: 6-2
-      uncomp:
-        bits: 1-0
-        value: b11
-
-  # A partial scheme defining a funct3 field in bits 14-12 (used in most RV32I
-  # instructions, and most BN.* custom instructions)
-  funct3:
-    fields:
-      funct3: 14-12
-
-  # RISC-V "R-type" encoding (reg <- fun(reg, reg))
-  R:
-    parents:
-      - rv
-      - funct3
-    fields:
-      funct7: 31-25
-      rs2: 24-20
-      rs1: 19-15
-      rd: 11-7
-
-  # RISC-V "I-type" encoding (reg <- fun(imm, reg))
-  I:
-    parents:
-      - rv
-      - funct3
-    fields:
-      imm: 31-20
-      rs1: 19-15
-      rd: 11-7
-
-  # RISC-V "I-type" encoding sub-type for shifts (reg <- fun(imm, reg))
-  Is:
-    parents:
-      - rv
-      - funct3
-    fields:
-      arithmetic: 30
-      shamt: 24-20
-      rs1: 19-15
-      rd: 11-7
-      unused:
-        bits: 31,29-25
-        value: b000000
-
-  # RISC-V "S-type" encoding (_ <- fun(reg, imm))
-  S:
-    parents:
-      - rv
-      - funct3
-    fields:
-      imm: 31-25,11-7
-      rs2: 24-20
-      rs1: 19-15
-
-  # RISC-V "B-type" encoding (like S, but different immediate layout; used for
-  # branches)
-  B:
-    parents:
-      - rv
-      - funct3
-    fields:
-      imm:
-        bits: 31,7,30-25,11-8
-        shift: 1
-      rs2: 24-20
-      rs1: 19-15
-
-  # RISC-V "U-type" encoding (reg <- fun(imm))
-  U:
-    parents:
-      - rv
-    fields:
-      imm:
-        bits: 31-12
-        shift: 12
-      rd: 11-7
-
-  # RISC-V "J-type" encoding (like U, but different immediate layout; used for
-  # jumps)
-  J:
-    parents:
-      - rv
-    fields:
-      imm:
-        bits: 31,19-12,20,30-21
-        shift: 1
-      rd: 11-7
-
-  # A partial scheme for custom instructions with opcode b00010
-  custom0:
-    parents:
-      - rv(opcode=b00010)
-
-  # A partial scheme for custom instructions with opcode b01010
-  custom1:
-    parents:
-      - rv(opcode=b01010)
-
-  # A partial scheme for custom instructions with opcode b01110
-  custom2:
-    parents:
-      - rv(opcode=b01110)
-
-  # A partial scheme for custom instructions with opcode b11110
-  custom3:
-    parents:
-      - rv(opcode=b11110)
-
-  # A partial scheme for instructions that produce a dest WDR.
-  wrd:
-    fields:
-      wrd: 11-7
-
-  # A partial scheme for instructions that take two source WDRs and produce a
-  # dest WDR.
-  wdr3:
-    parents:
-      - wrd
-    fields:
-      wrs2: 24-20
-      wrs1: 19-15
-
-  # A partial scheme that defines the 'fg' field (for <flag_group> operands)
-  fg:
-    fields:
-      fg: 31
-
-  # A partial scheme that defines the shift fields (type and bytes)
-  shift:
-    fields:
-      shift_type: 30
-      shift_bytes: 29-25
-
-  # A partial scheme that defines a function field at bit 31 for OTBN logical
-  # operations
-  funct31:
-    fields:
-      funct31: 31
-
-  # A partial scheme for specialized 2 bit function field, we need a reduced
-  # size in the lower two bits of funct3 as RSHI spills over 1 bit from its
-  # immediate
-  funct2:
-    fields:
-      funct2: 13-12
-
-  # A specialised encoding for the loop instruction (only one source, no
-  # destination)
-  loop:
-    parents:
-      - custom3
-      - funct2(funct2=b00)
-    fields:
-      bodysize: 31-20
-      grs: 19-15
-      fixed:
-        bits: 14,11-7
-        value: bxxxxxx
-
-  # A specialised encoding for the loopi instruction (which, unusually, has 2
-  # immediates)
-  loopi:
-    parents:
-      - custom3
-      - funct2(funct2=b01)
-    fields:
-      bodysize: 31-20
-      iterations: 19-15,11-7
-      fixed:
-        bits: 14
-        value: bx
-
-  # Used wide logical operations (bn.and, bn.or, bn.xor).
-  bna:
-    parents:
-      - custom1
-      - wdr3
-      - funct3
-      - shift
-      - funct31
-
-  # Used for bn.not (no second source reg).
-  bnan:
-    parents:
-      - custom1
-      - shift
-      - funct31
-      - wrd
-    fields:
-      wrs1: 24-20
-      fixed:
-        bits: 19-15
-        value: bxxxxx
-
-  # Used for the wide reg/reg ALU instructions.
-  bnaf:
-    parents:
-      - custom1
-      - wdr3
-      - funct3
-      - shift
-      - fg
-
-  # Used for the wide bn.addi and bn.subi instructions.
-  bnai:
-    parents:
-      - custom1
-      - wrd
-      - funct3
-      - fg
-    fields:
-      sub: 30
-      imm: 29-20
-      wrs: 19-15
-
-  # Used for bn.addm, bn.subm
-  bnam:
-    parents:
-      - custom1
-      - wdr3
-      - funct3
-    fields:
-      sub: 30
-      fixed:
-        bits: 31,29-25
-        value: bxxxxxx
-
-  # Used for bn.mulqacc
-  bnaq:
-    parents:
-      - custom2
-      - wdr3
-    fields:
-      wb: 31-30
-      dh: 29
-      qs2: 28-27
-      qs1: 26-25
-      acc:
-        bits: 14-13
-        shift: 6
-      z: 12
-
-  # Unusual scheme used for bn.rshi (the immediate bleeds into the usual funct3
-  # field)
-  bnr:
-    parents:
-      - custom3
-      - wdr3
-    fields:
-      imm: 31-25,14
-      funct2: 13-12
-
-  # Used by bn.sel.
-  bns:
-    parents:
-      - custom0
-      - wdr3
-      - funct3(funct3=b000)
-      - fg
-    fields:
-      fixed:
-        bits: 30-27
-        value: bxxxx
-      flag: 26-25
-
-  # Used by bn.cmp and bn.cmpb
-  bnc:
-    parents:
-      - custom0
-      - wdr3(wrd=bxxxxx)
-      - funct3
-      - shift
-      - fg
-
-  # Used by bn.lid and bn.sid
-  bnxid:
-    parents:
-      - custom0
-      - funct3
-    fields:
-      imm:
-        bits: 24-22,31-25
-        shift: 4
-      spp: 21
-      dpp: 20
-      rs: 19-15
-      rd: 11-7
-
-  # Used by bn.mov and bn.movr
-  bnmov:
-    parents:
-      - custom0
-      - funct3(funct3=b110)
-    fields:
-      indirect: 31
-      fixed_top:
-        bits: 30-22
-        value: bxxxxxxxxx
-      spp: 21
-      dpp: 20
-      src: 19-15
-      dst: 11-7
-
-  # Used by bn.wsrrs and bn.wsrrw
-  wcsr:
-    parents:
-      - custom0
-      - funct3(funct3=b111)
-    fields:
-      write: 31
-      wcsr: 27-20
-      wrs: 19-15
-      wrd: 11-7
-      fixed:
-        bits: 30-28
-        value: bxxx
+# The relative path to a YAML file defining the different instruction
+# encoding schemes
+encoding-schemes: enc-schemes.yml
 
 # The instructions. Instructions are listed in the given order within
 # each instruction group. There are the following fields:
diff --git a/hw/ip/otbn/util/shared/insn_yaml.py b/hw/ip/otbn/util/shared/insn_yaml.py
index 20d9244..0065567 100644
--- a/hw/ip/otbn/util/shared/insn_yaml.py
+++ b/hw/ip/otbn/util/shared/insn_yaml.py
@@ -5,18 +5,18 @@
 '''Support code for reading the instruction database in insns.yml'''
 
 import itertools
+import os
 import re
 from typing import Dict, List, Optional, Tuple, cast
 
-import yaml
-
 from .encoding import Encoding
 from .encoding_scheme import EncSchemes
 from .lsu_desc import LSUDesc
 from .operand import Operand
 from .syntax import InsnSyntax
 from .yaml_parse_helpers import (check_keys, check_str, check_bool,
-                                 check_list, index_list, get_optional_str)
+                                 check_list, index_list, get_optional_str,
+                                 load_yaml)
 
 
 class InsnGroup:
@@ -46,7 +46,7 @@
     def __init__(self,
                  yml: object,
                  groups: InsnGroups,
-                 encoding_schemes: EncSchemes) -> None:
+                 encoding_schemes: Optional[EncSchemes]) -> None:
         yd = check_keys(yml, 'instruction',
                         ['mnemonic', 'operands'],
                         ['group', 'rv32i', 'synopsis',
@@ -109,6 +109,11 @@
         encoding_yml = yd.get('encoding')
         self.encoding = None
         if encoding_yml is not None:
+            if encoding_schemes is None:
+                raise ValueError('{} specifies an encoding, but the file '
+                                 'didn\'t specify any encoding schemes.'
+                                 .format(what))
+
             self.encoding = Encoding(encoding_yml, encoding_schemes,
                                      self.name_to_operand, self.mnemonic)
             self._update_widths_from_encoding(self.encoding)
@@ -203,13 +208,26 @@
 
 
 class InsnsFile:
-    def __init__(self, yml: object) -> None:
+    def __init__(self, path: str, yml: object) -> None:
         yd = check_keys(yml, 'top-level',
-                        ['insn-groups', 'encoding-schemes', 'insns'],
-                        [])
+                        ['insn-groups', 'insns'],
+                        ['encoding-schemes'])
 
         self.groups = InsnGroups(yd['insn-groups'])
-        self.encoding_schemes = EncSchemes(yd['encoding-schemes'])
+
+        enc_scheme_path = get_optional_str(yd, 'encoding-schemes', 'top-level')
+        if enc_scheme_path is None:
+            self.encoding_schemes = None
+        else:
+            src_dir = os.path.dirname(path)
+            es_path = os.path.normpath(os.path.join(src_dir, enc_scheme_path))
+            es_yaml = load_yaml(es_path, 'encoding schemes')
+            try:
+                self.encoding_schemes = EncSchemes(es_yaml)
+            except ValueError as err:
+                raise RuntimeError('Invalid schema in YAML file at {!r}: {}'
+                                   .format(es_path, err)) from None
+
         self.insns = [Insn(i, self.groups, self.encoding_schemes)
                       for i in check_list(yd['insns'], 'insns')]
         self.mnemonic_to_insn = index_list('insns', self.insns,
@@ -252,14 +270,7 @@
 
     '''
     try:
-        with open(path, 'r') as handle:
-            return InsnsFile(yaml.load(handle, Loader=yaml.SafeLoader))
-    except FileNotFoundError:
-        raise RuntimeError('Cannot find YAML file at {!r}.'
-                           .format(path)) from None
-    except yaml.YAMLError as err:
-        raise RuntimeError('Failed to parse YAML file at {!r}: {}'
-                           .format(path, err)) from None
+        return InsnsFile(path, load_yaml(path, None))
     except ValueError as err:
         raise RuntimeError('Invalid schema in YAML file at {!r}: {}'
                            .format(path, err)) from None
diff --git a/hw/ip/otbn/util/shared/yaml_parse_helpers.py b/hw/ip/otbn/util/shared/yaml_parse_helpers.py
index 2607fee..6adf793 100644
--- a/hw/ip/otbn/util/shared/yaml_parse_helpers.py
+++ b/hw/ip/otbn/util/shared/yaml_parse_helpers.py
@@ -6,6 +6,7 @@
 
 from typing import Callable, Dict, List, Optional, Sequence, TypeVar
 
+import yaml
 
 T = TypeVar('T')
 
@@ -130,3 +131,22 @@
 def get_optional_str(data: Dict[str, object],
                      key: str, what: str) -> Optional[str]:
     return check_optional_str(data.get(key), '{} field for {}'.format(key, what))
+
+
+def load_yaml(path: str, what: Optional[str]) -> object:
+    '''Load a YAML file at path.
+
+    If there is no such file, or the file is not well-formed YAML, this raises
+    a RuntimeError. If what is not None, it will be used in the error message.
+
+    '''
+    for_msg = 'for ' + what if what is not None else ''
+    try:
+        with open(path, 'r') as handle:
+            return yaml.load(handle, Loader=yaml.SafeLoader)
+    except FileNotFoundError:
+        raise RuntimeError('Cannot find YAML file{} at {!r}.'
+                           .format(for_msg, path)) from None
+    except yaml.YAMLError as err:
+        raise RuntimeError('Failed to parse YAML file{} at {!r}: {}'
+                           .format(for_msg, path, err)) from None