Add Conv1x1 microbenchmark

A simple example of deploying the tosa.conv2d 1x1 model.

The input is set to zero so the output is just the bias (input = conv2d
zero_point).

Change-Id: Ied4f02a387aff5c180af938a06602b5103c50844
diff --git a/samples/microbenchmarks/CMakeLists.txt b/samples/microbenchmarks/CMakeLists.txt
new file mode 100644
index 0000000..df7230b
--- /dev/null
+++ b/samples/microbenchmarks/CMakeLists.txt
@@ -0,0 +1,34 @@
+# -------------------------------------------------------------------------------
+# Build the mlir bytecode modules with iree-compile. Note the last two flags
+# are for RVV support.
+# https://github.com/llvm/llvm-project/blob/0eeab8b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp#L30-L51
+# -------------------------------------------------------------------------------
+
+springbok_c_module(
+  NAME
+    conv1x1_test
+  SRC
+    "conv1x1_test.mlir"
+  FLAGS
+    "-iree-input-type=tosa"
+    "-riscv-v-vector-bits-min=512"
+    "-riscv-v-fixed-length-vector-lmul-max=8"
+  PUBLIC
+)
+
+springbok_test(
+  NAME
+    conv1x1_test_emitc_static
+  SRCS
+    "conv1x1_test.c"
+  DEPS
+    ::conv1x1_test_c
+    ::conv1x1_test_emitc
+    model_util::util
+  LINKOPTS
+    "LINKER:--defsym=__stack_size__=64k"
+  COPTS
+    "-DBUILD_EMITC_STATIC"
+  TESTFILES
+    "conv1x1_test.run"
+)
diff --git a/samples/microbenchmarks/conv1x1_test.c b/samples/microbenchmarks/conv1x1_test.c
new file mode 100644
index 0000000..68932bc
--- /dev/null
+++ b/samples/microbenchmarks/conv1x1_test.c
@@ -0,0 +1,65 @@
+// conv 1x1 tosa op test.
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "model_util/util.h"
+#include "samples/microbenchmarks/conv1x1_test_c.h"
+#include "samples/microbenchmarks/conv1x1_test_emitc.h"
+
+const MlModel kModel = {
+    .num_input = 1,
+    .num_input_dim = {4},
+    .input_shape = {{1, 112, 112, 8}},
+    .input_length = {112 * 112 * 8},
+    .input_size_bytes = {sizeof(int8_t)},
+    .num_output = 1,
+    .output_length = {112 * 112 * 16},
+    .output_size_bytes = sizeof(int8_t),
+    .hal_element_type = IREE_HAL_ELEMENT_TYPE_SINT_8,
+    .entry_func = "module.main",
+    .model_name = "conv1x1_quant",
+};
+
+iree_status_t create_module(iree_vm_instance_t *instance,
+                            iree_vm_module_t **module) {
+  return module_create(instance, iree_allocator_system(), module);
+}
+
+iree_hal_executable_library_query_fn_t library_query(void) {
+  return conv1x1_test_linked_llvm_cpu_library_query;
+}
+
+iree_status_t load_input_data(const MlModel *model, void **buffer,
+                              iree_const_byte_span_t **byte_span) {
+  iree_status_t result = alloc_input_buffer(model, buffer);
+  // Populate initial value to effectively 0 w.r.t conv1x1, so the output is the
+  // same as the bias.
+  memset((int8_t *)buffer[0], -128,
+         model->input_length[0] * model->input_size_bytes[0]);
+  byte_span[0] = malloc(sizeof(iree_const_byte_span_t));
+  *byte_span[0] = iree_make_const_byte_span(
+      buffer[0], model->input_size_bytes[0] * model->input_length[0]);
+  return result;
+}
+
+iree_status_t process_output(const MlModel *model,
+                             iree_hal_buffer_mapping_t *buffers,
+                             MlOutput *output) {
+  iree_status_t result = iree_ok_status();
+  // Output is ((bias + input_zp) * multiplier) >> shift + output_zp after
+  // rescale.
+  const int8_t kExpectedOutput[] = {-128, -128, -125, -128, -128, 127,
+                                    -107, -128, -128, -128, -128, -105,
+                                    74,   127,  -128, 69};
+  for (int i = 0; i < model->output_length[0] / sizeof(kExpectedOutput); ++i) {
+    for (int j = 0; j < sizeof(kExpectedOutput); ++j) {
+      if ((((const int8_t *)buffers[0]
+                .contents.data)[i * sizeof(kExpectedOutput) + j]) !=
+          kExpectedOutput[j]) {
+        result = iree_make_status(IREE_STATUS_UNKNOWN, "result mismatches");
+        break;
+      }
+    }
+  }
+  return result;
+}
diff --git a/samples/microbenchmarks/conv1x1_test.mlir b/samples/microbenchmarks/conv1x1_test.mlir
new file mode 100644
index 0000000..587f24a
--- /dev/null
+++ b/samples/microbenchmarks/conv1x1_test.mlir
@@ -0,0 +1,9 @@
+module {
+  func.func @main(%arg0: tensor<1x112x112x8xi8>) -> tensor<1x112x112x16xi8> {
+    %0 = "tosa.const"() {value = dense<[-918, -4433, 87, -234, -21393, 7738, 529, -8835, -16817, -375, -199, 572, 5082, 15569, -186, 4955]> : tensor<16xi32>} : () -> tensor<16xi32>
+    %1 = "tosa.const"() {value = dense<"0xE2E2E2E2E2EBF6E206E2E2E8EBE6DEE2E0E2E2FDDCE7E1E2E2E2E2FDDFE6E0E3EBE2E2DCEFAC213DE2E2E2E3E2D3C9E4E2E2E2E2E3E1E1FA3EE2E2E9E9E5E3E47FE2E2E4EDE2E2E3E0E2E2E4DFE9DCFEE3E2E2E2E2E2E2E2F9E2E2E6EBEADCE4E2E2E2CEE5E1E0E181E2E2D9D3DEE1DDE2E2E2E2E2E2E2E2C5E2E2E2DBE1E3E1"> : tensor<16x1x1x8xi8>} : () -> tensor<16x1x1x8xi8>
+    %2 = "tosa.conv2d"(%arg0, %1, %0) {dilation = [1, 1], pad = [0, 0, 0, 0], quantization_info = #tosa.conv_quant<input_zp = -128, weight_zp = -30>, stride = [1, 1]} : (tensor<1x112x112x8xi8>, tensor<16x1x1x8xi8>, tensor<16xi32>) -> tensor<1x112x112x16xi32>
+    %3 = "tosa.rescale"(%2) {double_round = true, input_zp = 0 : i32, multiplier = [1364139008 : i32], output_zp = -128 : i32, per_channel = false, scale32 = true, shift = [35 : i32]} : (tensor<1x112x112x16xi32>) -> tensor<1x112x112x16xi8>
+    return %3 : tensor<1x112x112x16xi8>
+  }
+}
diff --git a/samples/microbenchmarks/conv1x1_test.run b/samples/microbenchmarks/conv1x1_test.run
new file mode 100644
index 0000000..2aa9a92
--- /dev/null
+++ b/samples/microbenchmarks/conv1x1_test.run
@@ -0,0 +1 @@
+// RUN: ${TEST_RUNNER_CMD} %S/conv1x1_test_emitc_static