sw/vec_iree: Add option to build with kelvin

Use a BUILD_WITH_KELVIN flag to build targets with kelvin (default off).

Add support for kelvin output header.

Change-Id: I44529a91cacb03e0678fec786d04ac783767d3f9
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3c06d3d..a57add1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,10 +25,11 @@
 add_link_options("LINKER:--defsym=__tcm_length__=${TCM_LENGTH}")
 set(STACK_SIZE "10K" CACHE STRING "Stack size (default: 10K)")
 add_link_options("LINKER:--defsym=__stack_size__=${STACK_SIZE}")
-set(SPRINGBOK_LINKER_SCRIPT "$ENV{ROOTDIR}/sw/vec/springbok/springbok.ld" CACHE PATH "Springbok linker script path (default: springbok.ld)")
-set(BUILD_WITH_SPRINGBOK ON CACHE BOOL "Build the target with springbok BSP (default: ON)")
+set(BUILD_WITH_NATIVE OFF CACHE BOOL "Build the target with native support (default: OFF)")
+set(BUILD_WITH_KELVIN OFF CACHE BOOL "Build the target with kelvin BSP (default: OFF)")
 set(BUILD_WITH_RVV ON CACHE BOOL "Build the target with RVV (default: ON)")
 set(BUILD_NO_WMMU OFF CACHE BOOL "Build targets with no WMMU (default: OFF)")
+set(PRINT_IREE_STATS ON CACHE BOOL "Print out information on IREE statistics (default: ON)")
 
 #-------------------------------------------------------------------------------
 # IREE-specific settings
@@ -54,7 +55,7 @@
 add_compile_definitions(IREE_SYNCHRONIZATION_DISABLE_UNSAFE=1)
 add_compile_definitions(IREE_FILE_IO_ENABLE=0)
 add_compile_definitions(IREE_USER_CONFIG_H="${SPARROW_CONFIG_HEADER}")
-if (NOT BUILD_NO_WMMU)
+if (NOT PRINT_IREE_STATS)
   add_compile_definitions(IREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0)
   add_compile_definitions(IREE_HAL_MODULE_STRING_UTIL_ENABLE=0)
   add_compile_definitions(IREE_STATISTICS_ENABLE=0)
@@ -71,16 +72,23 @@
 #
 # However, an explicit include path gets added here across all targets so the
 # header files can be found during compilation.
-if(${BUILD_WITH_SPRINGBOK})
+if(BUILD_WITH_NATIVE)
+  # Add the native printout support.
+  add_subdirectory(native_log)
+  include(riscv_native_log)
+  include_directories(native_log)
+elseif(NOT BUILD_WITH_KELVIN)
   # Springbok BSP-related setting
+  set(SPRINGBOK_LINKER_SCRIPT "$ENV{ROOTDIR}/sw/vec/springbok/springbok.ld" CACHE PATH "Springbok linker script path (default: springbok.ld)")
   add_subdirectory($ENV{ROOTDIR}/sw/vec/springbok springbok)
   include(riscv_springbok)
   include_directories($ENV{ROOTDIR}/sw/vec/springbok/include)
 else()
-  # Add the springbok printout support.
-  add_subdirectory(native_log)
-  include(riscv_native_log)
-  include_directories(native_log)
+  # Kelvin BSP-related setting
+  set(KELVIN_LINKER_SCRIPT "$ENV{ROOTDIR}/sw/kelvin/crt/kelvin.ld" CACHE PATH "Kelvin linker script path (default: kelvin.ld)")
+  add_subdirectory($ENV{ROOTDIR}/sw/kelvin/crt kelvin)
+  include(riscv_kelvin)
+  include_directories($ENV{ROOTDIR}/sw/kelvin/crt)
 endif()
 
 message(STATUS "Include IREE source at ${IREE_SOURCE_DIR}")
@@ -100,10 +108,17 @@
 
 # softmax op (and mfcc) requires floorf implementation in libm. Use the nano
 # version.
-find_library(m m
-PATHS
-  "${RISCV_TOOLCHAIN_ROOT}/riscv32-unknown-elf/lib/newlib-nano/"
-REQUIRED)
+if(NOT BUILD_WITH_KELVIN)
+  find_library(m m
+  PATHS
+    "${RISCV_TOOLCHAIN_ROOT}/riscv32-unknown-elf/lib/newlib-nano/"
+  REQUIRED)
+else()
+  find_library(m m_nano
+  PATHS
+    "${RISCV_TOOLCHAIN_ROOT}/riscv32-unknown-elf/lib"
+  REQUIRED)
+endif()
 link_libraries(m)
 
 # Add the included directory here.
diff --git a/cmake/riscv_iree_gcc.cmake b/cmake/riscv_iree_gcc.cmake
new file mode 100644
index 0000000..ef219e6
--- /dev/null
+++ b/cmake/riscv_iree_gcc.cmake
@@ -0,0 +1,70 @@
+cmake_minimum_required (VERSION 3.13)
+
+# CMake invokes the toolchain file twice during the first build, but only once
+# during subsequent rebuilds. This was causing the various flags to be added
+# twice on the first build, and on a rebuild ninja would see only one set of the
+# flags and rebuild the world.
+# https://github.com/android-ndk/ndk/issues/323
+if(RISCV_GCC_TOOLCHAIN_INCLUDED)
+  return()
+endif(RISCV_GCC_TOOLCHAIN_INCLUDED)
+set(RISCV_GCC_TOOLCHAIN_INCLUDED true)
+
+set(CMAKE_SYSTEM_PROCESSOR riscv32)
+set(CMAKE_CROSSCOMPILING ON CACHE BOOL "")
+
+if(CMAKE_HOST_SYSTEM_NAME STREQUAL Linux)
+  set(RISCV_HOST_TAG linux)
+elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL Darwin)
+  set(RISCV_HOST_TAG darwin)
+endif()
+
+set(RISCV_TOOLCHAIN_NAME gcc)
+
+set(RISCV_TOOLCHAIN_ROOT "$ENV{CACHE}/toolchain_kelvin" CACHE PATH "RISC-V compiler path")
+set(CMAKE_FIND_ROOT_PATH ${RISCV_TOOLCHAIN_ROOT})
+list(APPEND CMAKE_PREFIX_PATH "${RISCV_TOOLCHAIN_ROOT}")
+
+set(CMAKE_C_COMPILER "${RISCV_TOOLCHAIN_ROOT}/bin/riscv32-unknown-elf-gcc")
+set(CMAKE_CXX_COMPILER "${RISCV_TOOLCHAIN_ROOT}/bin/riscv32-unknown-elf-g++")
+set(CMAKE_AR "${RISCV_TOOLCHAIN_ROOT}/bin/riscv32-unknown-elf-ar")
+set(CMAKE_RANLIB "${RISCV_TOOLCHAIN_ROOT}/bin/riscv32-unknown-elf-ranlib")
+set(CMAKE_STRIP "${RISCV_TOOLCHAIN_ROOT}/bin/riscv32-unknown-elf-strip")
+
+set(RISCV_COMPILER_FLAGS "" CACHE STRING "RISC-V compiler flags for C, CXX, and ASM")
+set(RISCV_COMPILER_FLAGS_CXX)
+set(RISCV_COMPILER_FLAGS_DEBUG)
+set(RISCV_COMPILER_FLAGS_RELEASE)
+set(RISCV_LINKER_FLAGS)
+
+set(CMAKE_SYSTEM_NAME Generic)
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_C_EXTENSIONS OFF)     # Force the usage of _ISOC11_SOURCE
+set(CMAKE_SYSTEM_LIBRARY_PATH "${RISCV_TOOLCHAIN_ROOT}/riscv32-unknown-elf/lib")
+set(RISCV_COMPILER_FLAGS "${RISCV_COMPILER_FLAGS} -march=rv32im -mabi=ilp32")
+
+add_compile_definitions(IREE_PLATFORM_GENERIC)
+
+# Use nano spec header and libraries.
+include_directories(BEFORE SYSTEM
+  "${RISCV_TOOLCHAIN_ROOT}/riscv32-unknown-elf/include/newlib-nano/"
+)
+set(RISCV_LINKER_FLAGS_EXE
+  "-L${RISCV_TOOLCHAIN_ROOT}/riscv32-unknown-elf/lib/newlib-nano/ -specs=nano.specs"
+)
+
+# TODO(lundong): Move no_warning_flags to sparrow_ops.cmake
+set(C_NO_WARNING_FLAGS "-Wno-char-subscripts -Wno-format -Wno-incompatible-pointer-types -Wno-int-to-pointer-cast -Wstrict-aliasing=0 -Wno-pointer-to-int-cast")
+
+set(CMAKE_C_FLAGS             "${RISCV_COMPILER_FLAGS} ${CMAKE_C_FLAGS} ${C_NO_WARNING_FLAGS}")
+set(CMAKE_CXX_FLAGS           "${RISCV_COMPILER_FLAGS} ${RISCV_COMPILER_FLAGS_CXX} ${CMAKE_CXX_FLAGS}")
+set(CMAKE_ASM_FLAGS           "${RISCV_COMPILER_FLAGS} ${CMAKE_ASM_FLAGS}")
+set(CMAKE_C_FLAGS_DEBUG       "${RISCV_COMPILER_FLAGS_DEBUG} ${CMAKE_C_FLAGS_DEBUG}")
+set(CMAKE_CXX_FLAGS_DEBUG     "${RISCV_COMPILER_FLAGS_DEBUG} ${CMAKE_CXX_FLAGS_DEBUG}")
+set(CMAKE_ASM_FLAGS_DEBUG     "${RISCV_COMPILER_FLAGS_DEBUG} ${CMAKE_ASM_FLAGS_DEBUG}")
+set(CMAKE_C_FLAGS_RELEASE     "${RISCV_COMPILER_FLAGS_RELEASE} ${CMAKE_C_FLAGS_RELEASE}")
+set(CMAKE_CXX_FLAGS_RELEASE   "${RISCV_COMPILER_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_RELEASE}")
+set(CMAKE_ASM_FLAGS_RELEASE   "${RISCV_COMPILER_FLAGS_RELEASE} ${CMAKE_ASM_FLAGS_RELEASE}")
+set(CMAKE_SHARED_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}")
+set(CMAKE_MODULE_LINKER_FLAGS "${RISCV_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS    "${RISCV_LINKER_FLAGS} ${RISCV_LINKER_FLAGS_EXE} ${CMAKE_EXE_LINKER_FLAGS}")
diff --git a/cmake/riscv_kelvin.cmake b/cmake/riscv_kelvin.cmake
new file mode 100644
index 0000000..be18ef6
--- /dev/null
+++ b/cmake/riscv_kelvin.cmake
@@ -0,0 +1,21 @@
+# A cmake cache to connect kelvin BSP with the executables
+
+if((NOT TARGET kelvin))
+  message(FATAL_ERROR "Please include kelvin target first")
+endif()
+
+if(NOT DEFINED KELVIN_LINKER_SCRIPT)
+  message(FATAL_ERROR "Please specifiy KELVIN_LINKER_SCRIPT path first")
+endif()
+
+function(add_executable executable)
+  cmake_parse_arguments(AE "ALIAS;IMPORTED" "" "" ${ARGN})
+  if(AE_ALIAS OR AE_IMPORTED)
+    _add_executable(${executable} ${ARGN})
+  else()
+    _add_executable(${executable} ${ARGN})
+    target_link_libraries(${executable} PRIVATE kelvin)
+    target_link_options(${executable} PRIVATE "-T${KELVIN_LINKER_SCRIPT}")
+    target_link_options(${executable} PRIVATE "-nostartfiles")
+  endif()
+endfunction()
diff --git a/cmake/sparrow_static_module.cmake b/cmake/sparrow_static_module.cmake
index 0aa6928..e75682e 100644
--- a/cmake/sparrow_static_module.cmake
+++ b/cmake/sparrow_static_module.cmake
@@ -77,6 +77,9 @@
   if (${_RULE_RVV_OFF})
     set(_CPU_FEATURES "+m,+f")
   endif()
+  if (${BUILD_WITH_KELVIN})
+    set(_CPU_FEATURES "+m")
+  endif()
 
   # Set common iree-compile flags
   set(_COMPILER_ARGS ${_RULE_FLAGS})
diff --git a/model_util/CMakeLists.txt b/model_util/CMakeLists.txt
index 05982b2..78b9665 100644
--- a/model_util/CMakeLists.txt
+++ b/model_util/CMakeLists.txt
@@ -10,6 +10,10 @@
     iree::modules::hal
 )
 
+if (${BUILD_WITH_KELVIN})
+  target_compile_definitions(model_util_util_base PUBLIC BUILD_KELVIN)
+endif()
+
 # static library using regular HAL
 iree_cc_library(
   NAME
diff --git a/model_util/model_api.h b/model_util/model_api.h
index 89a297c..62d0cbd 100644
--- a/model_util/model_api.h
+++ b/model_util/model_api.h
@@ -57,8 +57,9 @@
 
 // Process the ML execution output into the final data to be sent to the
 // host. `output_length` is set to the total byte size of the model's output.
+// `output_ptr` is set to the address of model's output
 iree_status_t process_output(const MlModel *model,
                              iree_hal_buffer_mapping_t *buffers,
-                             uint32_t *output_length);
+                             uint32_t *output_length, uint32_t *output_ptr);
 
 #endif  // MODEL_UTIL_MODEL_API_H_
diff --git a/model_util/util.c b/model_util/util.c
index bd84651..983d93e 100644
--- a/model_util/util.c
+++ b/model_util/util.c
@@ -26,7 +26,11 @@
 
 typedef struct {
   uint32_t return_code;  // Populated in crt0.S
+#ifndef BUILD_KELVIN
   uint32_t epc;          // Populated in crt0.S
+#else
+  uint32_t output_ptr;
+#endif
   uint32_t length;
 } OutputHeader;
 
@@ -236,8 +240,12 @@
   // Post-process memory into model output.
   if (iree_status_is_ok(result)) {
     uint32_t length = 0;
-    result = process_output(model, mapped_memories, &length);
+    uint32_t output_ptr = 0;
+    result = process_output(model, mapped_memories, &length, &output_ptr);
     output_header.length = length;
+#ifdef BUILD_KELVIN
+    output_header.output_ptr = output_ptr;
+#endif
   }
 
   for (int index_output = 0; index_output < model->num_output; index_output++) {
diff --git a/samples/microbenchmarks/conv1x1_test.c b/samples/microbenchmarks/conv1x1_test.c
index cbe19d8..7e90309 100644
--- a/samples/microbenchmarks/conv1x1_test.c
+++ b/samples/microbenchmarks/conv1x1_test.c
@@ -48,7 +48,7 @@
 
 iree_status_t process_output(const MlModel *model,
                              iree_hal_buffer_mapping_t *buffers,
-                             uint32_t *output_length) {
+                             uint32_t *output_length, uint32_t *output_ptr) {
   iree_status_t result = iree_ok_status();
   // Output is ((bias + input_zp) * multiplier) >> shift + output_zp after
   // rescale.
diff --git a/samples/simple_vec_mul/float_vec.c b/samples/simple_vec_mul/float_vec.c
index 913335d..0014b3c 100644
--- a/samples/simple_vec_mul/float_vec.c
+++ b/samples/simple_vec_mul/float_vec.c
@@ -96,7 +96,7 @@
 
 iree_status_t process_output(const MlModel *model,
                              iree_hal_buffer_mapping_t *buffers,
-                             uint32_t *output_length) {
+                             uint32_t *output_length, uint32_t *output_ptr) {
   iree_status_t result = iree_ok_status();
   for (int i = 0; i < buffers[0].contents.data_length / sizeof(float); ++i) {
     if (((const float *)buffers[0].contents.data)[i] != i * i / 8.0f) {
diff --git a/samples/simple_vec_mul/int_vec.c b/samples/simple_vec_mul/int_vec.c
index 41bbf0a..bf66502 100644
--- a/samples/simple_vec_mul/int_vec.c
+++ b/samples/simple_vec_mul/int_vec.c
@@ -96,7 +96,7 @@
 
 iree_status_t process_output(const MlModel *model,
                              iree_hal_buffer_mapping_t *buffers,
-                             uint32_t *output_length) {
+                             uint32_t *output_length, uint32_t *output_ptr) {
   iree_status_t result = iree_ok_status();
   for (int i = 0; i < buffers[0].contents.data_length / sizeof(int32_t); ++i) {
     if (((const int32_t *)buffers[0].contents.data)[i] != (i >> 1) * i) {