feature: Make files for building and running tbm

Also updates README.md. See there fore details about each of the make files.

Change-Id: I7ed9f4ef29305c980641bdf1e9d432d1c4c73672
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..cc7af4d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,60 @@
+include common.mk
+
+.DEFAULT_GOAL := all
+all: $(OUT_PACKAGES)/FBInstruction/Instruction.py
+
+# flatc actually generates two files, Instruction.py and Instructions.py. make
+# 4.3 supports `&:` for multiple targets, but it's probably not wise to depend
+# on such a new version of make, so instead we use Instruction.py as a sentinel
+# for both files.
+$(OUT_PACKAGES)/FBInstruction/Instruction.py: config/instruction.fbs | $(OUT_PACKAGES)
+	$(FLATC) -o $(OUT_PACKAGES) --python $<
+
+$(OUT_PACKAGES):
+	mkdir -p $@
+
+# Regenerate the pipe-maps for RISC-V, based on the opcodes from the
+# riscv-opcodes repo. The associated pipes are copied from the old json file.
+RISCV_EXTS := $(shell find $(ROOTDIR)/toolchain/riscv-opcodes -maxdepth 1 -name 'opcodes-*' -printf '%f\n' | cut -d- -f2-)
+riscv_pipe_maps: $(RISCV_EXTS:%=pipe_maps/riscv/%.json)
+.PHONY: riscv_pipe_maps
+
+pipe_maps/riscv/%.json: $(ROOTDIR)/toolchain/riscv-opcodes/opcodes-%
+	$(PYTHON) $(IMPORT_RISCV_OPCODES) $(if $(wildcard $@),-m $@) -n $@.new $<
+	mv $@.new $@
+
+requirements.txt:
+	pip list --format=freeze --local --not-required --exclude=pip --exclude=setuptools > $@
+.PHONY: requirements.txt
+
+lint:
+	$(PYLINT) tbm/*.py
+.PHONY:lint
+
+type-check:
+	$(PYTYPE) tbm/tbm.py tbm/gentrace-spike.py tbm/merge-counters.py
+.PHONY: type-check
+
+# After running pytype you can merge the inferred types into the .py files.
+# `make merge-pyi` will merge to all files.
+# `make merge-pyi-<module>` will merge only to module.
+define merge-pyi
+merge-pyi-$(1):
+	$(MERGE_PYI) -i $(2) $(3)
+.PHONY: merge-pyi-$(1)
+merge-pyi: merge-pyi-$(1)
+endef
+.PHONY: merge-pyi
+
+$(foreach py,$(wildcard tbm/*.py),\
+	$(eval name = $(basename $(notdir $(py))))\
+	$(eval pyi = .pytype/pyi/$(name).pyi)\
+	$(if $(wildcard $(pyi)),\
+		$(eval $(call merge-pyi,$(name),$(py),$(pyi)))))
+
+clean:
+	$(RM) $(OUT_PACKAGES)/FBInstruction/Instruction.py
+	$(RM) $(OUT_PACKAGES)/FBInstruction/Instructions.py
+	$(RM) -r __pycache__
+	$(RM) -r .pytype
+.PHONY: clean
diff --git a/README.md b/README.md
index a1690b6..7f8b003 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Trace Based Model (TBM)
 
-## Code structure
+## Project structure
 
 ### Executables:
 
@@ -27,9 +27,6 @@
   ScalarPipe, VectorPipe, scoreboard.Preemptive and scoreboard.VecPreemptive).
 - tbm/fetch_unit.py - defines FetchUnit.
 - tbm/functional_trace.py - reads a trace (as generated by gentrace-*.py).
-- tbm/instruction.fbs - FlatBuffer schema for the Instruction data class (used for
-  saving elaborated traces). The FBInstruction.Instruction module is generated
-  from this file.
 - tbm/instruction.py - defines Instruction, a data class representing a single
   instruction instance in the trace.
 - tbm/interfaces.py - defines the internal API. This will be more important when we
@@ -42,8 +39,51 @@
 - tbm/utilities.py - general purpose constructs.
 - tbm/vector_pipe.py - defines VectorPipe, a vector functional unit model.
 
-### Other files:
+### TBM configuration files:
 
-- config/uarch.schema.json - JSON schema for uArch configuration files.
+- config/instruction.fbs - FlatBuffer schema for the Instruction data class (used for
+  saving elaborated traces). The FBInstruction.Instruction module is generated
+  from this file.
 - config/rvv-simple.yaml - a uArch configuration example.
+- config/uarch.schema.json - JSON schema for uArch configuration files.
 - pipe_maps/riscv/*.json - pipe-maps, mapping RISC-V opcodes to functional units.
+
+### Build files:
+
+- common.mk
+- integration-tests.mk - runs tbm on some ML models.
+- Makefile - builds things that are needed for tbm to run.
+- riscv_tests.mk riscv_tests_isa.mk - runs tbm on tests from `$OUT/springbok/riscv-tests`.
+- rvv_tests.mk - run tbm on tests from `$OUT/springbok/rvv_for_tbm/tests`.
+- tbm.mk - rules for running tbm.
+
+
+## How to use the make files
+
+### Building TBM:
+
+Before running any of the TBM tools you must run `make all`.
+
+To update the RISC-V pipe-maps in `pipe_maps/riscv` run `make riscv_pipe_maps`.
+This will import missing opcodes from `$ROOTDIR/toolchain/riscv-opcodes`, and
+will remove spurious ones. New opcodes are mapped to "UNKNOWN". You can also
+update individual RISC-V pipe-maps like this: `make pipe_maps/riscv/rv32a.json`.
+
+To run a linter on all the Python files in `tbm/` run `make lint`.
+
+To type-check all the Python scripts run `make type-check`. After running the
+type checker you can merge inferred types back to the `.py` files by running
+`make merge-pyi` to merge into all `.py` files, or `make merge-pyi-MOD` to
+merge into `tbm/MOD.py`.
+
+### Running tests from shodan
+
+- `make -f integration-tests.mk integration_tests` - most of these tests are
+actually not very good for integration testing as they take far too long to
+complete.
+- `make -f riscv_tests.mk riscv_tests_isa` - run some of the tests from `$ROOTDIR/out/springbok/riscv-tests/isa`.
+- `make -f riscv_tests.mk riscv_tests_benchmarks` - run the tests from `$ROOTDIR/out/springbok/riscv-tests/benchmarks`.
+- `make -f riscv_tests.mk riscv_tests` - run the two previous targets.
+- `make -f riscv_tests.mk benchmarks` - runs the above benchmarks and generates
+  the file `benchmarks.md` with all the results.
+- `make -f rvv_tests.mk rvv_tests` - run the tests from `$ROOTDIR/out/springbok/rvv_for_tbm/tests/`.
diff --git a/common.mk b/common.mk
new file mode 100644
index 0000000..da14440
--- /dev/null
+++ b/common.mk
@@ -0,0 +1,22 @@
+ifndef ROOTDIR
+$(error "ROOTDIR not defined! Did you forget to run 'source build/setup.sh' in the repo root?")
+endif
+
+OUT_PACKAGES := $(OUT)/tbm/packages
+export PYTHONPATH := $(OUT_PACKAGES):$(PYTHONPATH)
+
+OUT_TRACES := $(OUT)/tbm/traces
+
+GENTRACE := tbm/gentrace-spike.py
+IMPORT_RISCV_OPCODES := tbm/import-riscv-opcodes.py
+MERGE_COUNTERS := tbm/merge-counters.py
+TBM := tbm/tbm.py
+
+FLATC := flatc
+MERGE_PYI := merge-pyi
+PYLINT := pylint
+PYTHON := python3
+PYTYPE := pytype
+SPIKE := $(OUT)/host/spike/bin/spike
+
+UARCH := config/rvv-simple.yaml
diff --git a/tbm/instruction.fbs b/config/instruction.fbs
similarity index 100%
rename from tbm/instruction.fbs
rename to config/instruction.fbs
diff --git a/integration-tests.mk b/integration-tests.mk
new file mode 100644
index 0000000..6c9ef69
--- /dev/null
+++ b/integration-tests.mk
@@ -0,0 +1,54 @@
+.DELETE_ON_ERROR:
+
+include tbm.mk
+
+.PHONY: integration_tests
+
+clean_integration_tests:
+	$(RM) -r $(OUT_TRACES)/integration
+.PHONY: clean_integration_tests
+clean: clean_integration_tests
+
+$(OUT_TRACES)/integration:
+	mkdir -p $@
+
+###############################################################################
+# To build mobilenet_v1_bytecode_static: `m iree`
+
+$(OUT_TRACES)/integration/quant_mobilenet_v1_bytecode_static.spike: $(OUT)/springbok_iree/quant_models/mobilenet_v1_bytecode_static | $(OUT_TRACES)/integration
+$(OUT_TRACES)/integration/quant_mobilenet_v1_bytecode_static.spike: SPIKE_ENTRY := 0x80000000
+$(OUT_TRACES)/integration/quant_mobilenet_v1_bytecode_static.spike: SPIKE_MEM := 0x80000000:0x100000,0x81000000:0x6000000
+$(OUT_TRACES)/integration/quant_mobilenet_v1_bytecode_static.tbm_log: $(UARCH)
+$(eval $(call tbm_merge_log,$(OUT_TRACES)/integration/quant_mobilenet_v1_bytecode_static,10000000,11,$(UARCH)))
+
+integration_tests: $(OUT_TRACES)/integration/quant_mobilenet_v1_bytecode_static.tbm_log
+
+###############################################################################
+# To build mobilenet_v1_bytecode_static: `m iree_no_wmmu`
+
+$(OUT_TRACES)/integration/float_mobilenet_v1_bytecode_static.spike: $(OUT)/springbok_iree_no_wmmu/float_models/mobilenet_v1_bytecode_static | $(OUT_TRACES)/integration
+$(OUT_TRACES)/integration/float_mobilenet_v1_bytecode_static.spike: SPIKE_ENTRY := 0x34000000
+$(OUT_TRACES)/integration/float_mobilenet_v1_bytecode_static.spike: SPIKE_MEM := 0x34000000:0x1000000
+$(OUT_TRACES)/integration/float_mobilenet_v1_bytecode_static.tbm_log: $(UARCH)
+$(eval $(call tbm_merge_log,$(OUT_TRACES)/integration/float_mobilenet_v1_bytecode_static,10000000,11,$(UARCH)))
+
+integration_tests: $(OUT_TRACES)/integration/float_mobilenet_v1_bytecode_static.tbm_log
+
+###############################################################################
+# To build mnist_bytecode_static: `m iree_no_wmmu`
+
+$(OUT_TRACES)/integration/float_mnist_bytecode_static.spike: $(OUT)/springbok_iree_no_wmmu/float_models/mnist_bytecode_static | $(OUT_TRACES)/integration
+$(OUT_TRACES)/integration/float_mnist_bytecode_static.spike: SPIKE_ENTRY := 0x34000000
+$(OUT_TRACES)/integration/float_mnist_bytecode_static.spike: SPIKE_MEM := 0x34000000:0x1000000
+$(OUT_TRACES)/integration/float_mnist_bytecode_static.tbm_log: $(UARCH)
+
+integration_tests: $(OUT_TRACES)/integration/float_mnist_bytecode_static.tbm_log
+
+###############################################################################
+# To build vector_executive.elf: `m springbok`
+
+$(OUT_TRACES)/integration/vector_executive.spike: $(OUT)/springbok/rvv/vector_executive/vector_executive.elf | $(OUT_TRACES)/integration
+$(OUT_TRACES)/integration/vector_executive.tbm_log: $(SPRINGBOK_UARCH)
+$(OUT_TRACES)/integration/vector_executive.tbm_log: $(UARCH)
+
+integration_tests: $(OUT_TRACES)/integration/vector_executive.tbm_log
diff --git a/riscv_tests.mk b/riscv_tests.mk
new file mode 100644
index 0000000..b6faca5
--- /dev/null
+++ b/riscv_tests.mk
@@ -0,0 +1,114 @@
+# Run tbm on the riscv-tests.
+# First, build the elf files `m springbok_riscv_tests`
+
+# This must be the first thing in the file:
+THIS_MAKEFILE := $(lastword $(MAKEFILE_LIST))
+
+.DELETE_ON_ERROR:
+
+include tbm.mk
+
+.PHONY: riscv_tests clean_riscv_tests
+clean: clean_riscv_tests
+
+RISCV_TESTS_DIR = $(OUT)/springbok/riscv-tests
+
+###############################################################################
+## Run tbm on the ELF files from $(RISCV_TESTS_DIR)/isa
+
+# Defines RISCV_TESTS_ISA
+include riscv_tests_isa.mk
+
+# TODO(sflur): is there a better way to list only the elf files?
+riscv_tests_isa.mk: $(wildcard $(RISCV_TESTS_DIR)/isa/*)
+	{ echo "## AUTO GENERATED FILE, DO NOT EDIT!";\
+	  echo "$$(cd $(RISCV_TESTS_DIR)/isa && file * |\
+	        sed -n 's/\([^:]\+\):[[:space:]]*ELF.*/RISCV_TESTS_ISA += \1/p')";\
+	} > $@
+
+# We only support user-level:
+RISCV_TESTS_ISA := $(filter rv32u%,$(RISCV_TESTS_ISA))
+# We don't support the A-ext:
+RISCV_TESTS_ISA := $(filter-out rv32ua-%,$(RISCV_TESTS_ISA))
+# We don't support the C-ext:
+RISCV_TESTS_ISA := $(filter-out rv32uc-%,$(RISCV_TESTS_ISA))
+# We don't support the D-ext:
+RISCV_TESTS_ISA := $(filter-out rv32ud-%,$(RISCV_TESTS_ISA))
+
+
+define isa_rules
+$$(OUT_TRACES)/riscv-tests/isa/$1.spike: $$(RISCV_TESTS_DIR)/isa/$1 | $$(OUT_TRACES)/riscv-tests/isa
+# The tests don't terminate nicely, so we have to use CYCLES
+$$(OUT_TRACES)/riscv-tests/isa/$1.spike: CYCLES = 10000
+$$(OUT_TRACES)/riscv-tests/isa/$1.tbm_log: $$(UARCH)
+
+riscv_tests_isa: $$(OUT_TRACES)/riscv-tests/isa/$1.tbm_log
+endef
+
+$(foreach t,$(RISCV_TESTS_ISA),$(eval $(call isa_rules,$(t))))
+
+$(OUT_TRACES)/riscv-tests/isa:
+	mkdir -p $@
+
+.PHONY: riscv_tests_isa
+riscv_tests: riscv_tests_isa
+
+clean_riscv_tests_isa:
+	$(RM) riscv_tests_isa.mk
+	$(RM) -r $(OUT_TRACES)/riscv-tests/isa
+.PHONY: clean_riscv_tests_isa
+clean_riscv_tests: clean_riscv_tests_isa
+
+###############################################################################
+## Run tbm on the ELF files from $(RISCV_TESTS_DIR)/benchmarks
+
+RISCV_TESTS_BENCHMARKS := $(wildcard $(RISCV_TESTS_DIR)/benchmarks/*.riscv)
+
+define benchmarks_rules
+$$(OUT_TRACES)/riscv-tests/benchmarks/$1.spike: $$(RISCV_TESTS_DIR)/benchmarks/$1 | $$(OUT_TRACES)/riscv-tests/benchmarks
+$$(OUT_TRACES)/riscv-tests/benchmarks/$1.spike: SPIKE_MEM := 0x80000000:0x100000
+$$(OUT_TRACES)/riscv-tests/benchmarks/$1.spike: SPIKE_ENTRY := 0x80000000
+$$(OUT_TRACES)/riscv-tests/benchmarks/$1.tbm_log: $$(UARCH)
+
+riscv_tests_benchmarks: $$(OUT_TRACES)/riscv-tests/benchmarks/$1.tbm_log
+
+RISCV_TESTS_BENCHMARKS_LOGS += $$(OUT_TRACES)/riscv-tests/benchmarks/$1.tbm_log
+endef
+
+$(foreach t,$(RISCV_TESTS_BENCHMARKS),$(eval $(call benchmarks_rules,$(notdir $(t)))))
+
+$(OUT_TRACES)/riscv-tests/benchmarks:
+	mkdir -p $@
+
+.PHONY: riscv_tests_benchmarks
+riscv_tests: riscv_tests_benchmarks
+
+# We use -B to make sure everything is up to date
+benchmarks:
+	TBM_OPTS=--report-dont-include-cfg $(MAKE) -f $(THIS_MAKEFILE) -B $(OUT_TRACES)/riscv-tests/benchmarks/benchmarks.md
+.PHONY: benchmarks
+
+$(OUT_TRACES)/riscv-tests/benchmarks/benchmarks.md: $(RISCV_TESTS_BENCHMARKS_LOGS)
+	@{ echo "# Benchmark Results #";\
+	  echo;\
+	  echo "Date: $$(date --iso-8601=seconds --utc)";\
+	  echo "uArch config ($(UARCH)):";\
+	  echo '```';\
+	  cat "$(UARCH)";\
+	  echo '```';\
+	  $(foreach test,$(RISCV_TESTS_BENCHMARKS),\
+	    echo;\
+	    echo "### Test: $(notdir $(test)) ###";\
+	    echo;\
+	    echo "ELF file: $(test:$(ROOTDIR)/%=%)";\
+	    echo;\
+	    echo '```';\
+	    cat "$(test:$(RISCV_TESTS_DIR)/%=$(OUT)/%.tbm_log)";\
+	    echo '```';\
+	  )\
+	} > $@
+
+clean_riscv_tests_benchmarks:
+	$(RM) -r $(OUT_TRACES)/riscv-tests/benchmarks
+.PHONY: clean_riscv_tests_benchmarks
+clean_riscv_tests: clean_riscv_tests_benchmarks
diff --git a/rvv_tests.mk b/rvv_tests.mk
new file mode 100644
index 0000000..1ee200a
--- /dev/null
+++ b/rvv_tests.mk
@@ -0,0 +1,27 @@
+# First, build the elf files `m springbok_for_tbm`
+
+.DELETE_ON_ERROR:
+
+include tbm.mk
+
+RVV_TESTS_DIR = $(OUT)/springbok/rvv_for_tbm/tests/
+RVV_TESTS := $(wildcard $(RVV_TESTS_DIR)/*.elf)
+
+define rvv_rules
+$$(OUT_TRACES)/rvv/tests/$1.spike: $(RVV_TESTS_DIR)/$1.elf | $(OUT_TRACES)/rvv/tests
+$$(OUT_TRACES)/rvv/tests/$1.tbm_log: $(UARCH)
+
+rvv_tests: $$(OUT_TRACES)/rvv/tests/$1.tbm_log
+endef
+
+$(foreach t,$(RVV_TESTS),$(eval $(call rvv_rules,$(basename $(notdir $(t))))))
+
+$(OUT_TRACES)/rvv/tests:
+	mkdir -p $@
+
+.PHONY: rvv_tests
+
+clean_rvv_tests:
+	$(RM) -r $(OUT_TRACES)/rvv/tests
+.PHONY: clean_rvv_tests
+clean: clean_rvv_tests
diff --git a/tbm.mk b/tbm.mk
new file mode 100644
index 0000000..3899f3e
--- /dev/null
+++ b/tbm.mk
@@ -0,0 +1,97 @@
+# Make rules for running TBM (and related tools)
+
+include common.mk
+
+FORCE:
+.PHONY: FORCE
+
+###############################################################################
+## Run Spike to generate .spike functional traces
+
+# The memory regions, and entry point can be found like this:
+# <toolchain>/bin/riscv32-unknown-elf-readelf -l <elf-file>
+SPIKE_MEM := 0x34000000:0x1000000
+SPIKE_ENTRY := 0x34000000
+SPIKE_OPTS += $(if $(SPIKE_MEM),-m$(SPIKE_MEM))
+SPIKE_OPTS += $(if $(SPIKE_ENTRY),--pc=$(SPIKE_ENTRY))
+SPIKE_OPTS += --varch=vlen:512,elen:32
+SPIKE_OPTS += -l --log-commits
+
+# The first prerequisite must be the ELF file.
+# CYCLES can be a number, in which case the trace will terminate after that
+# many instructions have been executed.
+%.spike:
+	$(RM) $@ $@.tmp
+	{ echo "run" $(CYCLES); echo "quit"; } > $@.cmd
+	$(SPIKE) $(SPIKE_OPTS) -d --debug-cmd=$@.cmd --log=$@.tmp $<
+	mv $@.tmp $@
+# It would be more appropriate to use `.SECONDARY`, instead of `.PRECIOUS`, but
+# only `.PRECIOUS` supports the `%` wildcard. To compensate for the difference
+# we first write to a .tmp file and then mv it to the real target.
+.PRECIOUS: %.spike
+
+###############################################################################
+## Run gentrace-spike.py to generate .trace elaborated functional traces
+
+%.trace.json: GENTRACE_OPTS += --json
+%.trace %.trace.json: %.spike
+	$(RM) $@ $@.tmp
+	$(PYTHON) $(GENTRACE) $(GENTRACE_OPTS) --outfile $@.tmp $<
+	mv $@.tmp $@
+# It would be more appropriate to use `.SECONDARY`, instead of `.PRECIOUS`, but
+# only `.PRECIOUS` supports the `%` wildcard. To compensate for the difference
+# we first write to a .tmp file and then mv it to the real target.
+.PRECIOUS: %.trace
+
+###############################################################################
+## Run tbm.py to genertae .tbm_log reports
+
+# This is a small hack to let you run tbm.py with --print-trace instead of
+# --report. The trace is printed to stdout and the .tbm_log file is not
+# touched. See 'tbm.py -h' for the possible values of TRACE (i.e. the values
+# --print-trace accepts).
+ifeq "$(TRACE)" ""
+  REPORT += --report $@
+else
+  REPORT += --print-trace $(TRACE)
+endif
+
+# Use this target when you expect tbm to be able to handle the whole trace.
+%.tbm_log: %.trace $(if $(TRACE),FORCE)
+	$(PYTHON) $(TBM) --uarch $(UARCH) $(TBM_OPTS) $(REPORT) $<
+
+# Use this macro to run tbm in multiple concurrent instances, each one handling
+# a different segment of the trace. The result is less accurate than running a
+# single instance, but can be much faster with 'make -j $(nproc)'.
+# Usage: $(eval $(call tbm_merge_log,<OUT>,<I>,<N>,<UARCH>))
+# this will generate a rule for <OUT>.tbm_merge_log, and rules for <N>
+# .tbm_counters files, each one covering a range of <I> instructions (last one
+# is open-ended).
+# Example: $(eval $(call tbm_merge_log,$(OUT)/test,400,3,uarch.json))
+define tbm_merge_log
+_RANGE := $$(shell for ((i = 0; i < $(3) - 1; ++i)); do echo "$$$$((i * $(2))):$$$$(((i + 1) * $(2)))"; done; echo "$$$$((i * $(2))):")
+$$(foreach r,$$(_RANGE),$$(eval $$(call _tbm_merge_log,$(1),$$(r),$(4))))
+endef
+
+define _tbm_merge_log
+_RANGE := $$(subst :, ,$(2))
+_START := $$(word 1,$$(_RANGE))
+_END := $$(word 2,$$(_RANGE))
+$$(eval $$(call tbm_counters,$(1),$$(_START),$$(_END),$(3)))
+endef
+
+# Don't call directly, see the tbm_merge_log macro above.
+%.tbm_merge_log:
+	$(PYTHON) $(MERGE_COUNTERS) --report $@ $^
+
+# Don't call directly, see the tbm_merge_log macro above.
+# Usage: $(eval $(call tbm_counters,<OUT>,<N>,<M>,<UARCH>)).
+# this will generate a rule for <OUT>.<N>_<M>.tbm_counters, which covers the
+# instruction range <N> to <M>.
+# Example: $(eval $(call tbm_merge_log,$(OUT)/test,0,10000,default_arch.json))
+define tbm_counters
+$(1).$(2)_$(3).tbm_counters: $(1).trace $4
+	$$(PYTHON) $$(TBM) --uarch $(4) --save-counters $$@ --instructions "$(2):$(3)" $$(TBM_OPTS) $$<
+
+$(1).tbm_merge_log: $(1).$(2)_$(3).tbm_counters
+endef