[hps] Use sw/kelvin helper header

Cleanup kelvin.h to build on top of the sw/kelvin/crt/kelvin.h

Bug: 331131250
Change-Id: Id09b1633262198a8f102d12c007b1f89fa1d684f
(cherry picked from commit 1a4ece5c21b7877a4d19f247bee5b769011ef97a)
diff --git a/sw/device/tests/kelvin/hps-c-port/model/src/model.cc b/sw/device/tests/kelvin/hps-c-port/model/src/model.cc
index dceab95..84cb798 100644
--- a/sw/device/tests/kelvin/hps-c-port/model/src/model.cc
+++ b/sw/device/tests/kelvin/hps-c-port/model/src/model.cc
@@ -1,3 +1,5 @@
+#include <cstdio>
+
 #include "sw/device/tests/kelvin/hps-c-port/model/inc/layers.h"
 
 int8_t buffer_000[120 * 160 * 16] __aligned__ __noinit__;
diff --git a/sw/device/tests/kelvin/hps-c-port/model/src/tfc_opt_conv2d_per_ch.cc b/sw/device/tests/kelvin/hps-c-port/model/src/tfc_opt_conv2d_per_ch.cc
index 82e9457..6a7e37b 100644
--- a/sw/device/tests/kelvin/hps-c-port/model/src/tfc_opt_conv2d_per_ch.cc
+++ b/sw/device/tests/kelvin/hps-c-port/model/src/tfc_opt_conv2d_per_ch.cc
@@ -1,6 +1,11 @@
 #include "sw/device/tests/kelvin/hps-c-port/model/inc/tfc_conv2d_per_ch.h"
+
+#include <algorithm>
+
 #include "sw/device/tests/kelvin/hps-c-port/model/inc/tfc_opt_util.h"
 
+#define fence()           __asm__ __volatile_always__("fence")
+
 namespace opt {
 
 constexpr int kPad = 256;  // ceil_to_pow2(zi=48), last layer (16-10)*48=288
diff --git a/sw/device/tests/kelvin/hps-c-port/sw/init/kelvin.h b/sw/device/tests/kelvin/hps-c-port/sw/init/kelvin.h
index 37101d7..ffa08ff 100644
--- a/sw/device/tests/kelvin/hps-c-port/sw/init/kelvin.h
+++ b/sw/device/tests/kelvin/hps-c-port/sw/init/kelvin.h
@@ -1,276 +1,66 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// Kelvin helper header
+
 #ifndef KELVIN_H_
 #define KELVIN_H_
 
+#include <assert.h>
 #include <math.h>
 #include <stdint.h>
 #include <string.h>
-#include <algorithm>
 
-// TODO(b/331131250): Use the kelvin header from sw/kelvin crt package.
-// And replace printf with '_write' function
-
-// Disable volatile assembly statements so that optimizer may remove unused.
-// #define __volatile__ volatile
-#define __volatile__
 #define __volatile_always__ volatile
-#define __volatile_never__
-
-// Branch taken compiler hint.
-#define likely(x)    __builtin_expect (!!(x), 1)
-#define unlikely(x)  __builtin_expect (!!(x), 0)
 
 // Helper macros for Intrinsics definitions.
-#define ARGS_F_A(FN, A0) FN " " #A0 "\n\t"
-#define ARGS_F_A_A(FN, A0, A1) FN " " #A0 ", " #A1 "\n\t"
-#define ARGS_F_A_A_A(FN, A0, A1, A2) FN " " #A0 ", " #A1 ", " #A2 "\n\t"
-#define ARGS_F_A_A_A_A(FN, A0, A1, A2, A3) FN " " #A0 ", " #A1 ", " #A2 ", " #A3 "\n\t"
-
-// Command and Status Registers.
-#define csrrw(a, b, c)   __asm__ __volatile_always__(ARGS_F_A_A_A("csrrw", %0, b, %1) : "=r"(a) : "r"(c))
-#define csrrs(a, b, c)   __asm__ __volatile_always__(ARGS_F_A_A_A("csrrs", %0, b, %1) : "=r"(a) : "r"(c))
-#define csrrc(a, b, c)   __asm__ __volatile_always__(ARGS_F_A_A_A("csrrc", %0, b, %1) : "=r"(a) : "r"(c))
-#define csrrwi(a, b, c)  __asm__ __volatile_always__(ARGS_F_A_A_A("csrrwi", %0, b, c) : "=r"(a) : )
-#define csrrsi(a, b, c)  __asm__ __volatile_always__(ARGS_F_A_A_A("csrrsi", %0, b, c) : "=r"(a) : )
-#define csrrci(a, b, c)  __asm__ __volatile_always__(ARGS_F_A_A_A("csrrci", %0, b, c) : "=r"(a) : )
-#define csrr(a, b)       __asm__ __volatile_always__(ARGS_F_A_A("csrr", %0, b) : "=r"(a) : )
-#define csrw(a, b)       __asm__ __volatile_always__(ARGS_F_A_A("csrw", a, %0) : : "r"(b))
-
-// Cache maintenance.
-#define fence()           __asm__ __volatile_always__("fence")
-#define fencei()          __asm__ __volatile_always__("fence.i")
-#define prefetch_p(a, b)  __asm__ __volatile_always__("prefetch.p %0, %1" : : "r"(a), "r"(b))
-#define prefetch_l(a, b)  __asm__ __volatile_always__("prefetch.p %0, %1" : : "r"(a), "r"(b))
-
-// Intrinsics helpers.
-static inline void valign() { asm __volatile_always__(".balign 16"); }
-
-#define vwiden_h_v(a, b)    vaddw_h_vx(a, b, 0);
-#define vwiden_w_v(a, b)    vaddw_w_vx(a, b, 0);
-#define vwiden_h_u_v(a, b)  vaddw_h_u_vx(a, b, 0);
-#define vwiden_w_u_v(a, b)  vaddw_w_u_vx(a, b, 0);
-
-#define nop()     __asm__ __volatile_always__("nop");
-#define ebreak()  __asm__ __volatile_always__("ebreak");
-#define ecall()   __asm__ __volatile_always__("ecall");
-#define mret()    __asm__ __volatile_always__("mret");
+#define ARGS_F_A(FN, A0) FN " " #A0 "\n"
+#define ARGS_F_A_A(FN, A0, A1) FN " " #A0 ", " #A1 "\n"
+#define ARGS_F_A_A_A(FN, A0, A1, A2) FN " " #A0 ", " #A1 ", " #A2 "\n"
+#define ARGS_F_A_A_A_A(FN, A0, A1, A2, A3) \
+  FN " " #A0 ", " #A1 ", " #A2 ", " #A3 "\n"
 
 #include "sw/device/tests/kelvin/hps-c-port/sw/init/kelvin_intrinsics.h"
 
-#define vm0   v0
-#define vm1   v4
-#define vm2   v8
-#define vm3   v12
-#define vm4   v16
-#define vm5   v20
-#define vm6   v24
-#define vm7   v28
-#define vm8   v32
-#define vm9   v36
-#define vm10  v40
-#define vm11  v44
-#define vm12  v48
-#define vm13  v52
-#define vm14  v56
-#define vm15  v60
-
-#define static_assert_offset(TYPE, FIELD, OFFSET) \
-  static_assert(__builtin_offsetof(TYPE, FIELD) == OFFSET, "invalid offset")
+#define vm0 v0
+#define vm1 v4
+#define vm2 v8
+#define vm3 v12
+#define vm4 v16
+#define vm5 v20
+#define vm6 v24
+#define vm7 v28
+#define vm8 v32
+#define vm9 v36
+#define vm10 v40
+#define vm11 v44
+#define vm12 v48
+#define vm13 v52
+#define vm14 v56
+#define vm15 v60
 
 struct vconv_u8_t {
-  uint32_t mode:2;    // 1:0
-  uint32_t start:5;   // 6:2
-  uint32_t stop:5;    // 11:7
-   int32_t sbias1:9;  // 20:12
-  uint32_t sdata1:1;  // 21
-   int32_t sbias2:9;  // 30:22
-  uint32_t sdata2:1;  // 31
+  uint32_t mode : 2;    // 1:0
+  uint32_t start : 5;   // 6:2
+  uint32_t stop : 5;    // 11:7
+  int32_t sbias1 : 9;   // 20:12
+  uint32_t sdata1 : 1;  // 21
+  int32_t sbias2 : 9;   // 30:22
+  uint32_t sdata2 : 1;  // 31
 };
-static_assert(sizeof(vconv_u8_t) == 4);
-
-static int32_t float_as_int(const float f) {
-  union {
-    int32_t i;
-    float f;
-  } c;
-  c.f = f;
-  return c.i;
-}
-
-static float int_as_float(const int32_t i) {
-  union {
-    int32_t i;
-    float f;
-  } c;
-  c.i = i;
-  return c.f;
-}
-
-#define f2i   float_as_int
-#define i2f   int_as_float
-
-#define printf_fmt(a)   flog(a)
-
-template <typename T>
-static inline void printf_arg(const T arg) {
-  if (std::is_same<T, float>::value) {
-    slog(arg);
-  } else if (std::is_same<T, const uint8_t*>::value ||
-             std::is_same<T, const char*>::value) {
-    klog(arg);
-  } else if (std::is_same<T, uint8_t*>::value ||
-             std::is_same<T, char*>::value) {
-#   define haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL)
-    const uint32_t *p_str = (const uint32_t*) (uint32_t) arg;
-    uint32_t data = 0;
-    int pos = 0;
-    do {
-      data = *p_str;
-      p_str++;
-      clog(data);
-    } while (!haszero(data));
-  } else {
-    slog(arg);
-  }
-}
-
-extern "C" inline int printf(const char *format, ...) {
-  printf_fmt(format);
-  return 0;
-}
-
-template <typename T1>
-static inline void printf(const char *format, const T1 arg1) {
-  printf_arg(arg1);
-  printf_fmt(format);
-}
-
-template <typename T1, typename T2>
-static inline void printf(const char *format, const T1 arg1, const T2 arg2) {
-  printf_arg(arg1);
-  printf_arg(arg2);
-  printf_fmt(format);
-}
-
-template <typename T1, typename T2, typename T3>
-static inline void printf(const char *format, const T1 arg1, const T2 arg2,
-                          const T3 arg3) {
-  printf_arg(arg1);
-  printf_arg(arg2);
-  printf_arg(arg3);
-  printf_fmt(format);
-}
-
-template <typename T1, typename T2, typename T3, typename T4>
-static inline void printf(const char *format, const T1 arg1, const T2 arg2,
-                          const T3 arg3, const T4 arg4) {
-  printf_arg(arg1);
-  printf_arg(arg2);
-  printf_arg(arg3);
-  printf_arg(arg4);
-  printf_fmt(format);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-static inline void printf(const char *format, const T1 arg1, const T2 arg2,
-                          const T3 arg3, const T4 arg4, const T5 arg5) {
-  printf_arg(arg1);
-  printf_arg(arg2);
-  printf_arg(arg3);
-  printf_arg(arg4);
-  printf_arg(arg5);
-  printf_fmt(format);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-static inline void printf(const char *format, const T1 arg1, const T2 arg2,
-                          const T3 arg3, const T4 arg4, const T5 arg5,
-                          const T6 arg6) {
-  printf_arg(arg1);
-  printf_arg(arg2);
-  printf_arg(arg3);
-  printf_arg(arg4);
-  printf_arg(arg5);
-  printf_arg(arg6);
-  printf_fmt(format);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
-static inline void printf(const char *format, const T1 arg1, const T2 arg2,
-                          const T3 arg3, const T4 arg4, const T5 arg5,
-                          const T6 arg6, const T7 arg7) {
-  printf_arg(arg1);
-  printf_arg(arg2);
-  printf_arg(arg3);
-  printf_arg(arg4);
-  printf_arg(arg5);
-  printf_arg(arg6);
-  printf_arg(arg7);
-  printf_fmt(format);
-}
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
-static inline void printf(const char *format, const T1 arg1, const T2 arg2,
-                          const T3 arg3, const T4 arg4, const T5 arg5,
-                          const T6 arg6, const T7 arg7, const T8 arg8) {
-  printf_arg(arg1);
-  printf_arg(arg2);
-  printf_arg(arg3);
-  printf_arg(arg4);
-  printf_arg(arg5);
-  printf_arg(arg6);
-  printf_arg(arg7);
-  printf_arg(arg8);
-  printf_fmt(format);
-}
-
-#define vdump_b(vreg, str)              \
-{                                       \
-  int n;                                \
-  getmaxvl_b(n);                        \
-  uint8_t tmp[n];                       \
-  vst_b_x(vreg, tmp);                   \
-  printf("<%s>", str);                  \
-  for (int i = 0; i < n; ++i) {         \
-    printf(" %02x", uint8_t(tmp[i]));   \
-  }                                     \
-  printf("\n");                         \
-}
-
-#define vdump_h(vreg, str)              \
-{                                       \
-  int n;                                \
-  getmaxvl_h(n);                        \
-  uint16_t tmp[n];                      \
-  vst_h_x(vreg, tmp);                   \
-  printf("<%s>", str);                  \
-  for (int i = 0; i < n; ++i) {         \
-    printf(" %04x", uint16_t(tmp[i]));  \
-  }                                     \
-  printf("\n");                         \
-}
-
-#define vdump_w(vreg, str)              \
-{                                       \
-  int n;                                \
-  getmaxvl_w(n);                        \
-  uint32_t tmp[n];                      \
-  vst_w_x(vreg, tmp);                   \
-  printf("<%s>", str);                  \
-  for (int i = 0; i < n; ++i) {         \
-    printf(" %08x", uint32_t(tmp[i]));  \
-  }                                     \
-  printf("\n");                         \
-}
-
-#define vprint_b(a, b)  vdump_b(a, b)
-#define vprint_h(a, b)  vdump_h(a, b)
-#define vprint_w(a, b)  vdump_w(a, b)
-
-static void assert(bool v, const char* s = "") {
-  if (v) return;
-  printf("**assert[%s]\n", s);
-  exit(-1);
-}
+static_assert(sizeof(struct vconv_u8_t) == 4);
 
 template<typename T>
 static T* mmap_uncached(T* ptr) {
@@ -280,8 +70,4 @@
 #define __aligned__     __attribute__ ((aligned(64)))
 #define __noinit__      __attribute__ ((section(".noinit")))
 
-extern "C" void *malloc(size_t size);
-
-extern "C" void free(void *ptr);
-
 #endif  // KELVIN_H_