TBM: The TBM executable, without the actual model

The boilerplate stuff to get TBM started. The actual models will come in the
following commits.

This commit also includes a uArch configuration.

Change-Id: I4d1054a46ee2c98aa277b3eeace76fb6ddf893ff
diff --git a/config/rvv-simple.yaml b/config/rvv-simple.yaml
new file mode 100644
index 0000000..2ed2e8b
--- /dev/null
+++ b/config/rvv-simple.yaml
@@ -0,0 +1,491 @@
+{
+  "description": "TBM configuration for Springbok",
+
+  # This file is also used as an example of a TBM configuration, hence all the
+  # comments.
+
+  # To experiment with different configurations based on this one, create a new
+  # configuration file with only the properties you want to change, and pass
+  # the new file to TBM with the '--extend' argument (this file should still be
+  # passed to TBM with the '--uarch' argument). You can use the '--extend'
+  # argument multiple times to pass multiple files. Normally, properties in
+  # 'extend' files overwrite (or add) properties. If you want to replace an
+  # entire object (i.e. not just overwrite the mentioned properties, also
+  # remove all the other properties), include the property 'replace : true' in
+  # the object.
+
+  # config: object, required. General parameters of the microarchitecture.
+  "config" : {
+    # branch_prediction: enum, required. Possible values:
+    #   'none' - no branch prediction, fetch is stalled until the target is
+    #            computed.
+    #   'perfect' - branching doesn't cause fetch stalls (the branch target is
+    #               taken from the input trace).
+    "branch_prediction" : "perfect",
+
+    # fetch_rate: positive integer, required. The number of instructions that
+    # are fetched in a cycle. If the fetch queue doesn't have enough space for
+    # all the instructions, none are fetched.
+    "fetch_rate" : 4,
+
+    # fetch_queue_size: positive integer. The size of the fetch queue. If
+    # omitted, the queue is unrestricted (infinite).
+    "fetch_queue_size" : 4,
+
+    # decode_rate: positive integer. The number of instructions that can be
+    # decoded and moved from the fetch queue to the dispatch queues in a cycle.
+    # If omitted, as many instructions as possible are decoded and moved.
+    "decode_rate" : 4,
+
+    # vector_slices: positive integer, required. The number of slices each
+    # vector register is composed of.
+    "vector_slices" : 2
+  },
+
+  # register_files: map, required. Currently the register file names X, F, V,
+  # and MISC are hard-coded in TBM. In the future, the 'regs' property will be
+  # used to determine which register file a register belongs to. All registers
+  # that are not X, F, or V, are MISC.
+  "register_files" : {
+    "X" : {
+      # type: enum, required. Possible values:
+      #   'scalar' - scalar registers.
+      #   'vector' - vector registers.
+      "type" : "scalar",
+
+      # regs: array, currently ignored.
+      "regs" : [
+               "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+        "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+        "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+        "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31"
+      ],
+
+      # read_ports: positive integer. The number of register reads that can be
+      # done in a cycle, excluding registers listed in the
+      # 'dedicated_read_ports' property. If omitted, any number of registers
+      # can be read in a cycle.
+      "read_ports" : 2,
+
+      # write_ports: positive integer. Similar to 'read_ports'.
+      "write_ports" : 1
+    },
+
+    "F" : {
+      "type" : "scalar",
+      "regs" : [
+        "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
+        "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
+        "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+        "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+      ],
+      "read_ports" : 2,
+      "write_ports" : 1
+    },
+
+    "V" : {
+      "type" : "vector",
+      "regs" : [
+        "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+        "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+        "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+        "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
+      ],
+      "read_ports" : 2,
+      "write_ports" : 1,
+
+      # dedicated_read_ports: array. This property is only valid when the
+      # register file 'type' is 'vector'. See 'read_ports' for more
+      # information.
+      "dedicated_read_ports" : [ "v0" ]
+
+      # dedicated_write_ports, array. Similar to 'dedicated_read_ports'.
+    },
+
+    "MISC" : {
+      "type" : "scalar"
+    }
+  },
+
+  # issue_queues: map, required. Currently the names S and V are hard-coded in
+  # TBM.
+  "issue_queues": {
+    "S" : {
+      # size: positive integer. The number of instructions the issue/dispatch
+      # queue can hold. If omitted, the queue is unrestricted (infinite).
+      "size" : 4
+    },
+
+    "V" : {
+      "size" : 4
+    }
+  },
+
+  # functional_units: map, required.
+  "functional_units" : {
+    "lsu" : {
+      # count: positive integer. The number of copies of this unit avilable. If
+      # omitted, one copy will be avilable.
+      "count" : 1,
+
+      # type: enum, required. Possible values:
+      #   'scalar' - scalar datapath.
+      #   'vector' - vector datapath (see slices).
+      "type" : "scalar",
+
+      # issue_queue: string, required. The issue queue that feeds this unit.
+      "issue_queue" : "S",
+
+      # eiq_size: positive integer. If omitted, the queue is unrestricted
+      # (infinite).
+      "eiq_size" : 4,
+
+      # can_skip_eiq: boolean, required. When true, instructions can move from
+      # the issue queue directly to the first pipeline stage. Otherwise,
+      # instructions must spend at list one cycle in the EIQ before they move
+      # to the first pipeline stage.
+      "can_skip_eiq" : true,
+
+      # depth: positive integer, required. The number of stages in the
+      # pipeline.
+      "depth" : 3,
+
+      # pipelined: boolean, required. When true, each pipeline stage can be
+      # populated with a different instruction. Otherwise, only one instruction
+      # can be in any stage of the pipeline.
+      "pipelined": true,
+
+      # load_stage: non-negative integer. For units that read from memory, this
+      # is the pipeline stage (zero based) in which the memory accesses is
+      # initiated.
+      "load_stage": 1,
+
+      # fixed_load_latency: non-negative integer, required when 'load_stage' is
+      # specified. A load instruction will stall the pipeline only if it
+      # reaches the pipeline stage 'load_stage + fixed_load_latency', and the
+      # memory value is not available yet.
+      "fixed_load_latency": 1,
+
+      # store_stage: non-negative integer. Similar to 'load_stage'.
+      "store_stage": 1,
+
+      # fixed_store_latency: non-negative integer, required when 'store_stage'
+      # is specified. Similar to 'fixed_load_latency'.
+      "fixed_store_latency": 1,
+
+      # memory_interface: string, required when 'load_stage' or 'store_stage'
+      # are specified. This should be one of the cache levels (see
+      # 'memory_system'), or 'main'. For units that accesses memory, this is
+      # the memory module the unit interacts with.
+      "memory_interface" : "L1D",
+
+      # writeback_buff_size: positive integer. The size of the register
+      # writeback buffer. If omitted, the buffer is unrestricted (infinite).
+      "writeback_buff_size": 2
+    },
+
+    "alu" : {
+      "type" : "scalar",
+      "issue_queue" : "S",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 1, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "div" : {
+      "type" : "scalar",
+      "issue_queue" : "S",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 4, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "mul" : {
+      "type" : "scalar",
+      "issue_queue" : "S",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 1, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "fpu" : {
+      "type" : "scalar",
+      "issue_queue" : "S",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 1, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "branch" : {
+      "description" : "TODO(sflur): not sure how to handle these instruction, so added this unit for now.",
+      "type" : "scalar",
+      "issue_queue" : "S",
+      "eiq_size" : 1, "can_skip_eiq" : true,
+      "depth" : 1, "pipelined": false,
+      "writeback_buff_size": 1
+    },
+
+    "csr" : {
+      "description" : "TODO(sflur): not sure how to handle these instructions, so added this unit for now.",
+      "type" : "scalar",
+      "issue_queue" : "S",
+      "eiq_size" : 1, "can_skip_eiq" : true,
+      "depth" : 1, "pipelined": false,
+      "writeback_buff_size": 2
+    },
+
+    "vctrl" : {
+      "description" : "TODO(sflur): not sure how to handle these instructions, so added this unit for now.",
+      "type" : "scalar",
+      "issue_queue" : "S",
+      "eiq_size" : 1, "can_skip_eiq" : true,
+      "depth" : 1, "pipelined": false,
+      "writeback_buff_size": 1
+    },
+
+    "vlsu" : {
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 3, "pipelined": true,
+      "load_stage": 1, "fixed_load_latency": 1,
+      "store_stage": 1, "fixed_store_latency": 1,
+      "memory_interface" : "L1D",
+      "writeback_buff_size": 2
+    },
+
+    "valu" : {
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 2, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "vmac" : {
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 3, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "vdiv" : {
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 3, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "vperm" : {
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 3, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "vred" : {
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 2, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "vmsk" : {
+      "description" : "From the spec doc, this look more special, it is used in conjunction with other units.",
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 2, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "vmv" : {
+      "description" : "This unit is not in the spec doc, but I suspect it might be added later.",
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 1, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "vfspecial" : {
+      "description" : "Not in the spec doc! copied from old config",
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 2, "pipelined": true,
+      "writeback_buff_size": 2
+    },
+
+    "vnimp" : {
+      "description" : "For instructions we don't care to implement",
+      "type" : "vector",
+      "issue_queue" : "V",
+      "eiq_size" : 4, "can_skip_eiq" : true,
+      "depth" : 1, "pipelined": false,
+      "writeback_buff_size": 2
+    }
+  },
+
+  # pipe_maps: array, required. A list of files specifying the mapping from
+  # instructions to functional units.
+  "pipe_maps" : [
+    "pipe_maps/riscv/missing.json",
+    "pipe_maps/riscv/custom.json",
+    "pipe_maps/riscv/rv32a.json",
+    "pipe_maps/riscv/rv32b.json",
+    "pipe_maps/riscv/rv32d.json",
+    "pipe_maps/riscv/rv32f.json",
+    "pipe_maps/riscv/rv32h.json",
+    "pipe_maps/riscv/rv32i.json",
+    "pipe_maps/riscv/rv32m.json",
+    "pipe_maps/riscv/rv32q.json",
+    "pipe_maps/riscv/rv32zfh.json",
+    "pipe_maps/riscv/rv64a.json",
+    "pipe_maps/riscv/rv64d.json",
+    "pipe_maps/riscv/rv64f.json",
+    "pipe_maps/riscv/rv64h.json",
+    "pipe_maps/riscv/rv64i.json",
+    "pipe_maps/riscv/rv64m.json",
+    "pipe_maps/riscv/rv64q.json",
+    "pipe_maps/riscv/rvp.json",
+    "pipe_maps/riscv/rvv.json",
+    "pipe_maps/riscv/rvv-pseudo.json",
+    "pipe_maps/riscv/springbok.json",
+    "pipe_maps/riscv/system.json"
+  ],
+
+  "__comment__ additional pipe_maps" : [
+    "pipe_maps/riscv/pseudo.json",
+    "pipe_maps/riscv/rv32c.json",
+    "pipe_maps/riscv/rv32d-zfh.json",
+    "pipe_maps/riscv/rv32k.json",
+    "pipe_maps/riscv/rv32q-zfh.json",
+    "pipe_maps/riscv/rv64b.json",
+    "pipe_maps/riscv/rv64c.json",
+    "pipe_maps/riscv/rv64k.json",
+    "pipe_maps/riscv/rv64zfh.json",
+    "pipe_maps/riscv/rvc.json",
+    "pipe_maps/riscv/rvk.json",
+    "pipe_maps/riscv/svinval.json"
+  ],
+
+  # memory_system: object, required. Description of the memory hierarchy.
+  "memory_system" : {
+    # latencies: object, required.
+    "latencies" : {
+      # fetch_read: positive integer, required when levels is not empty. The
+      # number of cycles required for handling a fetch read request coming from
+      # higher levels, not including the handling of the request by lower
+      # levels.
+      "fetch_read" : 1,
+
+      # fetch_write: positive integer, required with levels.
+      # Similar to fetch_read.
+      "fetch_write" : 1,
+
+      # write: positive integer, required (TODO(sflur): maybe this shouldn't be
+      # required for instruction cache?). The number of cycles required for
+      # handling a write request coming from a functional unit, not including
+      # the handling of the request by lower levels.
+      "write" : 1
+
+      # read: positive integer, required when this level is used as a
+      # memory_interface. The number of cycles required for handling a read
+      # request coming from a functional unit, not including the handling of
+      # the request by lower levels.
+    },
+
+    # levels: map. Description of the lower cache levels.
+    "levels" : {
+      "L3" : {
+        # type: enum, required. Possible values:
+        #   'dcache' - data cache, used for serving load/store instructions.
+        #   'icache' - instruction cache, used for fetching instructions.
+        #   'unified' - used for both instructions and data.
+        "type" : "unified",
+
+        # placement: object, required. Description of the placement policy.
+        "placement" : {
+          # type: enum, required. Possible values:
+          #   'direct_map'
+          #   'set_assoc'
+          "type" : "set_assoc",
+
+          # set_size: positive integer, required when 'type' is 'set_assoc'.
+          "set_size" : 4,
+
+          # replacement: enum, required when 'type' is 'set_assoc'. Possible values:
+          #   'LRU'
+          "replacement" : "LRU"
+        },
+
+        # write_policy: enum, required. Possible values:
+        #   'write_back'
+        #   'write_through'
+        "write_policy" : "write_back",
+
+        # inclusion: enum, required with levels. Possible values:
+        #   'exclusive'
+        #   'inclusive'
+        "inclusion" : "exclusive",
+
+        # line_size: positive integer, required. Cache line size in bits.
+        "line_size" : 128,
+
+        # size: bytes, required. Can be a positive integer, or a string with
+        # one of the suffixes b, kb, mb, gb, tb, lowercase or uppercase (e.g.
+        # "4MB").
+        "size" : "4MB",
+
+        "latencies" : {
+          "fetch_read" : 1,
+          "fetch_write" : 1,
+          "write" : 1
+        },
+        "levels" : {
+          "L2" : {
+            "type" : "unified",
+            "placement" : { "type" : "set_assoc",
+                            "set_size" : 4,
+                            "replacement" : "LRU"
+                          },
+            "write_policy" : "write_back",
+            "inclusion" : "exclusive",
+            "line_size" : 128,
+            "size" : "256KB",
+            "latencies" : { "fetch_read" : 1, "fetch_write" : 1, "write" : 1 },
+            "levels" : {
+              "L1D" : {
+                "type" : "dcache",
+                "placement" : { "type" : "set_assoc",
+                                "set_size" : 4,
+                                "replacement" : "LRU"
+                              },
+                "write_policy" : "write_back",
+                "line_size" : 128,
+                "size" : "32KB",
+                "latencies" : { "read" : 1, "write" : 1 }
+              },
+              "L1I" : {
+                "type" : "icache",
+                "placement" : { "type" : "set_assoc",
+                                "set_size" : 4,
+                                "replacement" : "LRU"
+                              },
+                "write_policy" : "write_back",
+                "line_size" : 128,
+                "size" : "32KB",
+                "latencies" : { "read" : 1, "write" : 1 }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/config/uarch.schema.json b/config/uarch.schema.json
new file mode 100644
index 0000000..0148606
--- /dev/null
+++ b/config/uarch.schema.json
@@ -0,0 +1,266 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id" : "uarch.schema.json",
+
+  "title": "Microarchitecture Configuration",
+  "description": "TBM configuration for a microarchitecture",
+
+  "$ref" : "#/$defs/object",
+  "properties" : {
+    "config" : {
+      "$ref" : "#/$defs/object",
+      "properties" : {
+        "branch_prediction" : { "enum" : ["none", "perfect"] },
+        "fetch_rate" : { "$ref" : "#/$defs/positive_integer" },
+        "fetch_queue_size" : {
+          "description" : "Infinite queue if omitted",
+          "$ref" : "#/$defs/positive_integer"
+        },
+        "decode_rate" : {
+          "description" : "Unrestricted if omitted",
+          "$ref" : "#/$defs/positive_integer"
+        },
+        "vector_slices" : {
+          "description" : "The number of slices each vector register is composed of",
+          "$ref" : "#/$defs/positive_integer"
+        }
+      },
+      "unevaluatedProperties" : false,
+      "required" : ["branch_prediction", "fetch_rate", "vector_slices"]
+    },
+
+    "register_files" : {
+      "$ref" : "#/$defs/object",
+      "unevaluatedProperties" : {
+        "$ref" : "#/$defs/object",
+        "properties" : {
+          "type" : { "$ref" : "#/$defs/data_type" },
+          "regs" : {
+            "$comment" : "TODO(sflur): TBM currently ignores `regs`",
+            "type" : "array",
+            "items" : { "type": "string" },
+            "minItems" : 1,
+            "uniqueItems" : true
+          },
+          "read_ports" : { "$ref" : "#/$defs/positive_integer" },
+          "write_ports" : { "$ref" : "#/$defs/positive_integer" }
+        },
+        "if" : {
+          "properties" : { "type" : { "const" : "vector" } }
+        }, "then" : {
+          "properties" : {
+            "dedicated_read_ports" : {
+              "description" : "Reading from the listed registers doesn't use a read port.",
+              "type" : "array",
+              "items" : {
+                "type" : "string",
+                "$comment" : "items should be from `regs`"
+              },
+              "minItems" : 1,
+              "uniqueItems" : true
+            },
+            "dedicated_write_ports" : {
+              "description" : "Writing to the listed registers doesn't use a write port.",
+              "type" : "array",
+              "items" : {
+                "type" : "string",
+                "$comment" : "items should be from `regs`"
+              },
+              "minItems" : 1,
+              "uniqueItems" : true
+            }
+          }
+        },
+        "unevaluatedProperties" : false,
+        "required" : ["type"]
+      }
+    },
+
+    "issue_queues" : {
+      "$ref" : "#/$defs/object",
+      "unevaluatedProperties" : {
+        "$ref" : "#/$defs/object",
+        "properties" : {
+          "size" : { "$ref" : "#/$defs/positive_integer" }
+        },
+        "unevaluatedProperties" : false
+      },
+      "minProperties": 1
+    },
+
+    "functional_units" : {
+      "$ref" : "#/$defs/object",
+      "unevaluatedProperties" : {
+        "$ref" : "#/$defs/object",
+        "properties" : {
+          "count" : { "$ref" : "#/$defs/positive_integer" },
+          "type" : { "$ref" : "#/$defs/data_type" },
+          "issue_queue" : {
+            "type" : "string",
+            "$comment" : "should be one of the `issue_queues`"
+          },
+          "eiq_size" : {
+            "description" : "Unrestricted if omitted",
+            "$ref" : "#/$defs/positive_integer"
+          },
+          "can_skip_eiq" : { "type" : "boolean" },
+          "depth" : { "$ref" : "#/$defs/positive_integer" },
+          "pipelined": { "type" : "boolean" },
+          "load_stage": { "$ref" : "#/$defs/non_negative_integer" },
+          "store_stage": { "$ref" : "#/$defs/non_negative_integer" },
+          "writeback_buff_size": {
+            "description" : "Unrestricted if omitted",
+            "$ref" : "#/$defs/positive_integer"
+          }
+        },
+        "dependentSchemas" : {
+          "load_stage": {
+            "properties" : {
+              "fixed_load_latency": { "$ref" : "#/$defs/non_negative_integer" },
+              "memory_interface" : {
+                "type" : "string",
+                "$comment" : "should be one of the cache levels, or `'main'`"
+              }
+            },
+            "required" : ["fixed_load_latency", "memory_interface"]
+          },
+          "store_stage": {
+            "properties" : {
+              "fixed_store_latency": { "$ref" : "#/$defs/non_negative_integer" },
+              "memory_interface" : {
+                "type" : "string",
+                "$comment" : "should be one of the cache levels, or `'main'`"
+              }
+            },
+            "required" : ["fixed_store_latency", "memory_interface"]
+          }
+        },
+        "unevaluatedProperties" : false,
+        "required" : ["type", "issue_queue", "can_skip_eiq", "depth",
+                      "pipelined"]
+      }
+    },
+
+    "pipe_maps" : {
+      "type" : "array",
+      "items" : {
+        "description" : "File (.json) path, that maps instructions to functional units",
+        "type": "string"
+      },
+      "minItems" : 1,
+      "uniqueItems" : true
+    },
+
+    "memory_system" : {
+      "$ref" : "#/$defs/object",
+      "properties" : {
+        "latencies" : { "$ref" : "#/$defs/mem_latencies" },
+        "levels" : { "$ref" : "#/$defs/cache_levels" }
+      },
+      "unevaluatedProperties" : false,
+      "required" : ["latencies"]
+    }
+  },
+  "unevaluatedProperties" : false,
+  "required" : ["config", "register_files", "issue_queues", "functional_units",
+                "pipe_maps", "memory_system"],
+
+  "$defs" : {
+    "object" : {
+      "description" : "an object type that allows string `description` and `__comment__` prefix",
+      "type" : "object",
+      "properties" : {
+        "description" : { "type" : "string" }
+      },
+      "patternProperties" : { "^__comment__" : true },
+      "$comment" : "TBM also accepts the boolean 'replace' property in files passed with the '--extend' argument, but those are not validated separately, only the merged result is validated."
+    },
+
+    "positive_integer" : {
+      "type" : "integer",
+      "minimum" : 1
+    },
+
+    "non_negative_integer" : {
+      "type" : "integer",
+      "minimum" : 0
+    },
+
+    "data_type" : { "enum" : ["scalar", "vector"] },
+
+    "bits" : {
+      "type" : "integer"
+    },
+
+    "bytes" : {
+      "type" : ["string", "integer"],
+      "pattern" : "^\\d+(b|B|kb|KB|mb|MB|gb|GB|tb|TB)?$"
+    },
+
+    "mem_latencies" : {
+      "$ref" : "#/$defs/object",
+      "properties" : {
+        "fetch_read" : { "$ref" : "#/$defs/positive_integer" },
+        "fetch_write" : { "$ref" : "#/$defs/positive_integer" },
+        "read" : { "$ref" : "#/$defs/positive_integer" },
+        "write" : { "$ref" : "#/$defs/positive_integer" }
+      },
+      "unevaluatedProperties" : false
+    },
+
+    "cache_levels" : {
+      "$ref" : "#/$defs/object",
+      "unevaluatedProperties" : {
+        "$ref" : "#/$defs/object",
+        "properties" : {
+          "type" : { "enum" : ["unified", "dcache", "icache"] },
+          "placement" : {
+            "$ref" : "#/$defs/object",
+            "properties" : {
+              "type" : { "enum" : ["direct_map", "set_assoc"] }
+            },
+            "if" : {
+              "properties" : { "type" : { "constant" : "set_assoc" } }
+            }, "then" : {
+              "properties" : {
+                "set_size" : {
+                  "description" : "the number of lines in the set; must be a power of 2",
+                  "$ref" : "#/$defs/positive_integer"
+                },
+                "replacement" : { "enum" : ["LRU"] }
+              },
+              "required" : ["set_size", "replacement"]
+            },
+            "unevaluatedProperties" : false,
+            "required" : ["type"]
+          },
+          "write_policy" : { "enum" : ["write_back", "write_through"] },
+          "line_size" : {
+            "description" : "the number of bits in a cache line; must be equal to `2^(3*n)`, for some `n`",
+            "$ref" : "#/$defs/bits",
+            "$ref" : "#/$defs/positive_integer"
+          },
+          "size" : {
+            "description" : "the total capacity of the cache in bytes; must be a power of 2",
+            "$ref" : "#/$defs/bytes",
+            "minimum" : 1,
+            "pattern" : "^[1-9]"
+          },
+          "latencies" : { "$ref" : "#/$defs/mem_latencies" },
+          "levels" : { "$ref" : "#/$defs/cache_levels" }
+        },
+        "dependentSchemas" : {
+          "levels" : {
+            "properties" : {
+              "inclusion" : { "enum" : ["exclusive", "inclusive"] }
+            },
+            "required" : ["inclusion"]
+          }
+        },
+        "unevaluatedProperties" : false,
+        "required" : ["type", "placement", "write_policy", "line_size", "size",
+                      "latencies"]
+      }
+    }
+  }
+}
diff --git a/tbm.py b/tbm.py
new file mode 100755
index 0000000..956d21c
--- /dev/null
+++ b/tbm.py
@@ -0,0 +1,287 @@
+#! /usr/bin/env python3
+
+"""Trace based model.
+
+Models the microarchitectural behavior of a processor
+based on a trace obtained from a functional simulator.
+"""
+
+import json
+import logging
+from typing import Any, Dict, Sequence
+import sys
+
+import jsonschema
+import yaml
+
+from cpu import CPU
+from functional_trace import FunctionalTrace
+from memory_system import MemorySystem
+from scalar_pipe import ScalarPipe
+import scoreboard
+import tbm_options
+import utilities
+from vector_pipe import VectorPipe
+
+
+logger = logging.getLogger("tbm")
+
+
+def schema_validator() -> jsonschema.protocols.Validator:
+    # TODO(b/261619078): use importlib instead of relative path
+    schema_file_name = "config/uarch.schema.json"
+
+    with open(schema_file_name, "r", encoding="ascii") as schema_file:
+        uarch_schema = json.load(schema_file)
+
+    # Check that the schema is valid
+    vcls = jsonschema.validators.validator_for(uarch_schema)
+    try:
+        vcls.check_schema(uarch_schema)
+    except jsonschema.exceptions.SchemaError as e:
+        logger.error("in '%s':\n%s", schema_file_name, e.message)
+        sys.exit(1)
+
+    return vcls(uarch_schema)
+
+
+def validate_uarch(validator: jsonschema.protocols.Validator,
+                   uarch: Dict[str, Any]):
+    errors = sorted(validator.iter_errors(uarch), key=lambda e: e.path)
+    if errors:
+        errs = []
+        for err in errors:
+            if err.path:
+                errs.append("/".join(str(p) for p in err.path) + ": "
+                            + err.message)
+            else:
+                errs.append(err.message)
+        logger.error("Found %d errors in the microarchitecture"
+                     " configuration:\n%s",
+                     len(errors), "\n".join(errs))
+        sys.exit(1)
+
+
+def load_config_file(name: str) -> Dict[str, Any]:
+    with open(name, "r", encoding="ascii") as file:
+        if name.endswith(".json"):
+            return json.load(file)
+
+        if not name.endswith(".yaml"):
+            logger.warning("The file '%s' has an unrecognized suffix (expected"
+                           " .json or .yaml). Trying to load it as YAML.",
+                           name)
+
+        return yaml.safe_load(file)
+
+
+def load_uarch() -> Dict[str, Any]:
+    """Read micro-architecture description."""
+
+    # Read in the micro-architecture description.
+    uarch_desc = load_config_file(tbm_options.args.uarch)
+
+    # Read in the micro-architecture description schema.
+    validator = schema_validator()
+
+    # Check that the original (un-patched) uarch is valid.
+    validate_uarch(validator, uarch_desc)
+
+    # Read additional modifications from files.
+    for fl in tbm_options.args.extend:
+        logger.info("Applying modifications from file '%s'", fl)
+
+        data = load_config_file(fl)
+
+        merge_config(uarch_desc, data)
+
+    # Apply command line modifications.
+    for cl_set in tbm_options.args.set:
+        logger.info("Applying setting '%s'", cl_set)
+
+        path, value = cl_set.split("=")
+        path = path.split(".")
+
+        apply_setting(uarch_desc, path, json.loads(value))
+
+    # Check that the patched uarch is valid
+    if tbm_options.args.extend or tbm_options.args.set:
+        validate_uarch(validator, uarch_desc)
+
+    remove_comments(uarch_desc)
+
+    if not tbm_options.args.report_dont_include_cfg:
+        if tbm_options.args.report:
+            # Save to file
+            with open(tbm_options.args.report, "w", encoding="ascii") as out:
+                print("Configuration:", file=out)
+                print(json.dumps(uarch_desc, indent=2), file=out)
+                print(file=out)
+        else:
+            # Or print to stdout
+            print("Configuration:")
+            print(json.dumps(uarch_desc, indent=2))
+            print()
+
+    return uarch_desc
+
+
+def apply_setting(uarch: Dict[str, Any], path: Sequence[str],
+                  value: Any) -> None:
+    """Modify an element of micro-architectural description.
+
+    Args:
+      uarch: uArch configuration to be modified
+      path: path through tree of dictionaries
+      value: value to be set
+    """
+    # We expect path to be non-empty
+    assert path
+
+    for idx, seg in enumerate(path):
+        if seg not in uarch:
+            logger.error("attempt to override non-existent element: %s",
+                         ".".join(path[:idx+1]))
+            sys.exit(1)
+
+        # Traverse down `path` to update uarch until the last element.
+        if idx < len(path) - 1:
+            uarch = uarch[seg]
+        else:
+            # Last element
+            logger.info("Changing '%s' to '%s'", seg, value)
+            uarch[seg] = value
+
+
+def merge_config(uarch: Dict[str, Any],
+                 modification: Dict[str, Any]) -> None:
+    """Merge modification tree into uarch description.
+
+    Modification is performed by recursing down to leaves
+    replacing old entries with entries from modification.
+
+    Args:
+      uarch: uArch configuration to be modified
+      modification: configuration to be merged into uarch
+    """
+
+    for key, val in modification.items():
+        if (key in uarch and isinstance(val, dict) and not val.pop("replace",
+                                                                   False)):
+            merge_config(uarch[key], val)
+        else:
+            logger.info("  Replacing '%s' with '%s'", key, val)
+            uarch[key] = val
+
+
+def remove_comments(desc: Dict[str, Any]) -> None:
+    comments = [
+        k for k in desc if k == "description" or k.startswith("__comment__")
+    ]
+
+    for k in comments:
+        del desc[k]
+
+    for _, val in desc.items():
+        if isinstance(val, dict):
+            remove_comments(val)
+
+
+def create_scoreboard(uid: str, desc: Dict[str, Any],
+                      config_desc: Dict[str, Any]):
+    if desc["type"] == "scalar":
+        return scoreboard.Preemptive(uid, desc)
+
+    if desc["type"] == "vector":
+        return scoreboard.VecPreemptive(uid, desc, config_desc["vector_slices"])
+
+    assert False
+
+
+def create_cpu(uarch_desc: Dict[str, Any], in_trace: FunctionalTrace) -> CPU:
+    """Create CPU."""
+
+    # Read in the pipe maps.
+    pipe_map = {}
+    pipe_map_keys = pipe_map.keys()  # This is a dynamic view
+    for pm_file in uarch_desc["pipe_maps"]:
+        with open(pm_file, "r", encoding="ascii") as pm_io:
+            pm = json.load(pm_io)
+
+        pm.pop("__comment__", None)
+
+        if pipe_map_keys & pm.keys():
+            logger.error("instruction(s) with multiple mappings: %s",
+                   ", ".join(pipe_map.keys() & pm.keys()))
+            sys.exit(1)
+
+        pipe_map.update(pm)
+
+    pipe_map = {k: v for k, v in pipe_map.items() if v != "UNKNOWN"}
+
+    rf_scoreboards = {
+        uid: create_scoreboard(uid, rf_desc, uarch_desc["config"])
+        for uid, rf_desc in uarch_desc["register_files"].items()
+    }
+
+    mem_sys = MemorySystem(uarch_desc["memory_system"])
+
+    cpu = CPU(pipe_map, rf_scoreboards, mem_sys, uarch_desc["config"], in_trace)
+
+    for uid, iq_desc in uarch_desc["issue_queues"].items():
+        cpu.sched_unit.add_queue(uid, iq_desc)
+
+    for kind, fu_desc in uarch_desc["functional_units"].items():
+        if fu_desc["type"] == "scalar":
+            cpu.exec_unit.add_pipe(kind,
+                [ScalarPipe(f"{kind}{i}", kind, fu_desc, cpu.mem_sys,
+                            rf_scoreboards)
+                 for i in range(fu_desc.get("count", 1))])
+        else:
+            assert fu_desc["type"] == "vector"
+
+            cpu.exec_unit.add_pipe(kind,
+                [VectorPipe(f"{kind}{i}", kind, fu_desc,
+                            uarch_desc["config"]["vector_slices"], cpu.mem_sys,
+                            rf_scoreboards)
+                 for i in range(fu_desc.get("count", 1))])
+
+    return cpu
+
+
+def main(argv: Sequence[str]) -> int:
+    tbm_options.parse_args(argv, description=__doc__)
+    # This assert convinces pytype that args is not None.
+    assert tbm_options.args is not None
+
+    log_level = logging.WARNING
+    if tbm_options.args.verbose > 0:
+        log_level = logging.INFO
+
+    utilities.logging_config(log_level)
+
+    uarch = load_uarch()
+
+    if tbm_options.args.trace is None:
+        tr = FunctionalTrace.from_json(sys.stdin, tbm_options.args.instructions)
+        cpu = create_cpu(uarch, tr)
+        cpu.simulate()
+        return 0
+
+    if tbm_options.args.json_trace:
+        with open(tbm_options.args.trace, "r", encoding="ascii") as in_trace:
+            tr = FunctionalTrace.from_json(in_trace,
+                                           tbm_options.args.instructions)
+            cpu = create_cpu(uarch, tr)
+            cpu.simulate()
+            return 0
+
+    with open(tbm_options.args.trace, "rb") as in_trace:
+        tr = FunctionalTrace.from_fb(in_trace, tbm_options.args.instructions)
+        cpu = create_cpu(uarch, tr)
+        cpu.simulate()
+        return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))
diff --git a/tbm_options.py b/tbm_options.py
new file mode 100644
index 0000000..159e8a3
--- /dev/null
+++ b/tbm_options.py
@@ -0,0 +1,104 @@
+""" Store command-line options for global access. """
+
+import argparse
+from typing import Sequence
+
+args = None
+
+
+def parse_args(argv: Sequence[str], description: str) -> None:
+    parser = argparse.ArgumentParser(
+        description=description,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("-u",
+                        "--uarch",
+                        required=True,
+                        help="Microarchitecture configuration file",
+                        metavar="JSON")
+
+    parser.add_argument("-e",
+                        "--extend",
+                        action="append",
+                        default=[],
+                        help="Extension used to modify microarchitecture. This"
+                        " option can be used multiple times.",
+                        metavar="JSON")
+
+    parser.add_argument("-s",
+                        "--set",
+                        action="append",
+                        default=[],
+                        help="Modify individual parts of the microarchitecture."
+                        " This option can be used multiple times.",
+                        metavar="PATH=VALUE")
+
+    parser.add_argument("-r",
+                        "--report",
+                        help="Print report to FILE (otherwise report is printed"
+                        " to stdout)",
+                        metavar="FILE")
+
+    parser.add_argument("--report-dont-include-cfg",
+                        action='store_true',
+                        help="Don't include the configuration with the report.",
+                        dest="report_dont_include_cfg")
+
+    parser.add_argument("--save-counters",
+                        help="Save counters to FILE for later processing",
+                        metavar="FILE",
+                        dest="save_counters")
+
+    parser.add_argument("-t",
+                        "--print-trace",
+                        choices=["detailed", "three-valued"],
+                        help="Print cycle-by-cycle trace",
+                        dest="print_trace")
+
+    parser.add_argument("--print-from-cycle",
+                        default=0,
+                        type=int,
+                        help="Start printing only from cycle N",
+                        metavar="N",
+                        dest="print_from_cycle")
+
+    parser.add_argument("--cycles",
+                        type=int,
+                        help="Stop running after N cycles",
+                        metavar="N",
+                        dest="print_cycles")
+
+    parser.add_argument("--instructions",
+                        default="0:",
+                        help="Restrict the run to the instructions between N"
+                        " and M",
+                        metavar="N:[M]")
+
+    parser.add_argument("--json-trace",
+                        action='store_true',
+                        help="Read the input trace as JSON",
+                        dest="json_trace")
+
+    parser.add_argument("--json-trace-buffer-size",
+                        type=int,
+                        default=100000,
+                        help="For efficiency, read N instructions at a time.",
+                        metavar="N",
+                        dest="json_trace_buffer_size")
+
+    # The -v flag is setup so that verbose holds the number of times the flag
+    # was used. This is the standard way to use -v, even though at the moment
+    # we have only two levels of verbosity: warning (the default, with no -v),
+    # and info.
+    parser.add_argument("-v",
+                        "--verbose",
+                        default=0,
+                        action="count",
+                        help="Increase the verbosity level. By default only"
+                        " errors and warnings will show. Use '-v' to also show"
+                        " information messages.")
+
+    parser.add_argument("trace", nargs="?", help="Input trace", metavar="FILE")
+
+    global args  # pylint: disable=global-statement
+    args = parser.parse_args(argv)