sw/vec_iree: Populate output_length in IREE exec

This CL enables populating the output_length variable in the output
header.

First we create a new output_header struct, placing it at the top of the
model_output section. The return code and epc are already populated
using crt0.

When I first added the MlOutput struct I thought we would have an
arbitrary pointer, but now the output lives right after the header.
Remove that struct and have the process_output call populate a length
instead.

Change-Id: I0b0005dcdc56ea53f124993cac81646fb8cd4561
diff --git a/model_util/model_api.h b/model_util/model_api.h
index 22a2300..64540da 100644
--- a/model_util/model_api.h
+++ b/model_util/model_api.h
@@ -45,11 +45,6 @@
   char model_name[];
 } MlModel;
 
-typedef struct {
-  void *result;
-  uint32_t len;
-} MlOutput;
-
 // Load the statically embedded library
 iree_hal_executable_library_query_fn_t library_query(void);
 
@@ -64,10 +59,9 @@
                               iree_const_byte_span_t **byte_span);
 
 // Process the ML execution output into the final data to be sent to the
-// host. The final format is model dependent, so the address and size
-// are returned via `output.`
+// host. `output_length` is set to the total byte size of the model's output.
 iree_status_t process_output(const MlModel *model,
                              iree_hal_buffer_mapping_t *buffers,
-                             MlOutput *output);
+                             uint32_t *output_length);
 
 #endif  // MODEL_UTIL_MODEL_API_H_
diff --git a/model_util/util.c b/model_util/util.c
index 0e2435a..1c656f0 100644
--- a/model_util/util.c
+++ b/model_util/util.c
@@ -28,6 +28,14 @@
 #include "iree/vm/api.h"
 #include "iree/vm/bytecode_module.h"
 
+typedef struct {
+  uint32_t return_code;  // Populated in crt0.S
+  uint32_t epc;          // Populated in crt0.S
+  uint32_t length;
+} OutputHeader;
+
+__attribute__((section(".model_output_header"))) OutputHeader output_header;
+
 extern const MlModel kModel;
 
 // Prepare the input buffers and buffer_views based on the data type. They must
@@ -180,9 +188,9 @@
 
   // Post-process memory into model output.
   if (iree_status_is_ok(result)) {
-    MlOutput output = {.result = NULL, .len = 0};
-    result = process_output(model, mapped_memories, &output);
-    // TODO(jesionowski): Populate CSRs with `output` after validating result.
+    uint32_t length = 0;
+    result = process_output(model, mapped_memories, &length);
+    output_header.length = length;
   }
 
   for (int index_output = 0; index_output < model->num_output; index_output++) {
diff --git a/samples/microbenchmarks/conv1x1_test.c b/samples/microbenchmarks/conv1x1_test.c
index 68932bc..8e88743 100644
--- a/samples/microbenchmarks/conv1x1_test.c
+++ b/samples/microbenchmarks/conv1x1_test.c
@@ -44,7 +44,7 @@
 
 iree_status_t process_output(const MlModel *model,
                              iree_hal_buffer_mapping_t *buffers,
-                             MlOutput *output) {
+                             uint32_t *output_length) {
   iree_status_t result = iree_ok_status();
   // Output is ((bias + input_zp) * multiplier) >> shift + output_zp after
   // rescale.
diff --git a/samples/simple_vec_mul/float_vec.c b/samples/simple_vec_mul/float_vec.c
index 42ce5b8..4ebacb5 100644
--- a/samples/simple_vec_mul/float_vec.c
+++ b/samples/simple_vec_mul/float_vec.c
@@ -98,7 +98,7 @@
 
 iree_status_t process_output(const MlModel *model,
                              iree_hal_buffer_mapping_t *buffers,
-                             MlOutput *output) {
+                             uint32_t *output_length) {
   iree_status_t result = iree_ok_status();
   for (int i = 0; i < buffers[0].contents.data_length / sizeof(float); ++i) {
     if (((const float *)buffers[0].contents.data)[i] != i * i / 8.0f) {
diff --git a/samples/simple_vec_mul/int_vec.c b/samples/simple_vec_mul/int_vec.c
index 3cf57b3..d51ce6a 100644
--- a/samples/simple_vec_mul/int_vec.c
+++ b/samples/simple_vec_mul/int_vec.c
@@ -98,7 +98,7 @@
 
 iree_status_t process_output(const MlModel *model,
                              iree_hal_buffer_mapping_t *buffers,
-                             MlOutput *output) {
+                             uint32_t *output_length) {
   iree_status_t result = iree_ok_status();
   for (int i = 0; i < buffers[0].contents.data_length / sizeof(int32_t); ++i) {
     if (((const int32_t *)buffers[0].contents.data)[i] != (i >> 1) * i) {