[hps] Use sw/kelvin helper header Cleanup kelvin.h to build on top of the sw/kelvin/crt/kelvin.h Bug: 331131250 Change-Id: Id09b1633262198a8f102d12c007b1f89fa1d684f (cherry picked from commit 1a4ece5c21b7877a4d19f247bee5b769011ef97a)
diff --git a/sw/device/tests/kelvin/hps-c-port/model/src/model.cc b/sw/device/tests/kelvin/hps-c-port/model/src/model.cc index dceab95..84cb798 100644 --- a/sw/device/tests/kelvin/hps-c-port/model/src/model.cc +++ b/sw/device/tests/kelvin/hps-c-port/model/src/model.cc
@@ -1,3 +1,5 @@ +#include <cstdio> + #include "sw/device/tests/kelvin/hps-c-port/model/inc/layers.h" int8_t buffer_000[120 * 160 * 16] __aligned__ __noinit__;
diff --git a/sw/device/tests/kelvin/hps-c-port/model/src/tfc_opt_conv2d_per_ch.cc b/sw/device/tests/kelvin/hps-c-port/model/src/tfc_opt_conv2d_per_ch.cc index 82e9457..6a7e37b 100644 --- a/sw/device/tests/kelvin/hps-c-port/model/src/tfc_opt_conv2d_per_ch.cc +++ b/sw/device/tests/kelvin/hps-c-port/model/src/tfc_opt_conv2d_per_ch.cc
@@ -1,6 +1,11 @@ #include "sw/device/tests/kelvin/hps-c-port/model/inc/tfc_conv2d_per_ch.h" + +#include <algorithm> + #include "sw/device/tests/kelvin/hps-c-port/model/inc/tfc_opt_util.h" +#define fence() __asm__ __volatile_always__("fence") + namespace opt { constexpr int kPad = 256; // ceil_to_pow2(zi=48), last layer (16-10)*48=288
diff --git a/sw/device/tests/kelvin/hps-c-port/sw/init/kelvin.h b/sw/device/tests/kelvin/hps-c-port/sw/init/kelvin.h index 37101d7..ffa08ff 100644 --- a/sw/device/tests/kelvin/hps-c-port/sw/init/kelvin.h +++ b/sw/device/tests/kelvin/hps-c-port/sw/init/kelvin.h
@@ -1,276 +1,66 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// Kelvin helper header + #ifndef KELVIN_H_ #define KELVIN_H_ +#include <assert.h> #include <math.h> #include <stdint.h> #include <string.h> -#include <algorithm> -// TODO(b/331131250): Use the kelvin header from sw/kelvin crt package. -// And replace printf with '_write' function - -// Disable volatile assembly statements so that optimizer may remove unused. -// #define __volatile__ volatile -#define __volatile__ #define __volatile_always__ volatile -#define __volatile_never__ - -// Branch taken compiler hint. -#define likely(x) __builtin_expect (!!(x), 1) -#define unlikely(x) __builtin_expect (!!(x), 0) // Helper macros for Intrinsics definitions. -#define ARGS_F_A(FN, A0) FN " " #A0 "\n\t" -#define ARGS_F_A_A(FN, A0, A1) FN " " #A0 ", " #A1 "\n\t" -#define ARGS_F_A_A_A(FN, A0, A1, A2) FN " " #A0 ", " #A1 ", " #A2 "\n\t" -#define ARGS_F_A_A_A_A(FN, A0, A1, A2, A3) FN " " #A0 ", " #A1 ", " #A2 ", " #A3 "\n\t" - -// Command and Status Registers. -#define csrrw(a, b, c) __asm__ __volatile_always__(ARGS_F_A_A_A("csrrw", %0, b, %1) : "=r"(a) : "r"(c)) -#define csrrs(a, b, c) __asm__ __volatile_always__(ARGS_F_A_A_A("csrrs", %0, b, %1) : "=r"(a) : "r"(c)) -#define csrrc(a, b, c) __asm__ __volatile_always__(ARGS_F_A_A_A("csrrc", %0, b, %1) : "=r"(a) : "r"(c)) -#define csrrwi(a, b, c) __asm__ __volatile_always__(ARGS_F_A_A_A("csrrwi", %0, b, c) : "=r"(a) : ) -#define csrrsi(a, b, c) __asm__ __volatile_always__(ARGS_F_A_A_A("csrrsi", %0, b, c) : "=r"(a) : ) -#define csrrci(a, b, c) __asm__ __volatile_always__(ARGS_F_A_A_A("csrrci", %0, b, c) : "=r"(a) : ) -#define csrr(a, b) __asm__ __volatile_always__(ARGS_F_A_A("csrr", %0, b) : "=r"(a) : ) -#define csrw(a, b) __asm__ __volatile_always__(ARGS_F_A_A("csrw", a, %0) : : "r"(b)) - -// Cache maintenance. -#define fence() __asm__ __volatile_always__("fence") -#define fencei() __asm__ __volatile_always__("fence.i") -#define prefetch_p(a, b) __asm__ __volatile_always__("prefetch.p %0, %1" : : "r"(a), "r"(b)) -#define prefetch_l(a, b) __asm__ __volatile_always__("prefetch.p %0, %1" : : "r"(a), "r"(b)) - -// Intrinsics helpers. -static inline void valign() { asm __volatile_always__(".balign 16"); } - -#define vwiden_h_v(a, b) vaddw_h_vx(a, b, 0); -#define vwiden_w_v(a, b) vaddw_w_vx(a, b, 0); -#define vwiden_h_u_v(a, b) vaddw_h_u_vx(a, b, 0); -#define vwiden_w_u_v(a, b) vaddw_w_u_vx(a, b, 0); - -#define nop() __asm__ __volatile_always__("nop"); -#define ebreak() __asm__ __volatile_always__("ebreak"); -#define ecall() __asm__ __volatile_always__("ecall"); -#define mret() __asm__ __volatile_always__("mret"); +#define ARGS_F_A(FN, A0) FN " " #A0 "\n" +#define ARGS_F_A_A(FN, A0, A1) FN " " #A0 ", " #A1 "\n" +#define ARGS_F_A_A_A(FN, A0, A1, A2) FN " " #A0 ", " #A1 ", " #A2 "\n" +#define ARGS_F_A_A_A_A(FN, A0, A1, A2, A3) \ + FN " " #A0 ", " #A1 ", " #A2 ", " #A3 "\n" #include "sw/device/tests/kelvin/hps-c-port/sw/init/kelvin_intrinsics.h" -#define vm0 v0 -#define vm1 v4 -#define vm2 v8 -#define vm3 v12 -#define vm4 v16 -#define vm5 v20 -#define vm6 v24 -#define vm7 v28 -#define vm8 v32 -#define vm9 v36 -#define vm10 v40 -#define vm11 v44 -#define vm12 v48 -#define vm13 v52 -#define vm14 v56 -#define vm15 v60 - -#define static_assert_offset(TYPE, FIELD, OFFSET) \ - static_assert(__builtin_offsetof(TYPE, FIELD) == OFFSET, "invalid offset") +#define vm0 v0 +#define vm1 v4 +#define vm2 v8 +#define vm3 v12 +#define vm4 v16 +#define vm5 v20 +#define vm6 v24 +#define vm7 v28 +#define vm8 v32 +#define vm9 v36 +#define vm10 v40 +#define vm11 v44 +#define vm12 v48 +#define vm13 v52 +#define vm14 v56 +#define vm15 v60 struct vconv_u8_t { - uint32_t mode:2; // 1:0 - uint32_t start:5; // 6:2 - uint32_t stop:5; // 11:7 - int32_t sbias1:9; // 20:12 - uint32_t sdata1:1; // 21 - int32_t sbias2:9; // 30:22 - uint32_t sdata2:1; // 31 + uint32_t mode : 2; // 1:0 + uint32_t start : 5; // 6:2 + uint32_t stop : 5; // 11:7 + int32_t sbias1 : 9; // 20:12 + uint32_t sdata1 : 1; // 21 + int32_t sbias2 : 9; // 30:22 + uint32_t sdata2 : 1; // 31 }; -static_assert(sizeof(vconv_u8_t) == 4); - -static int32_t float_as_int(const float f) { - union { - int32_t i; - float f; - } c; - c.f = f; - return c.i; -} - -static float int_as_float(const int32_t i) { - union { - int32_t i; - float f; - } c; - c.i = i; - return c.f; -} - -#define f2i float_as_int -#define i2f int_as_float - -#define printf_fmt(a) flog(a) - -template <typename T> -static inline void printf_arg(const T arg) { - if (std::is_same<T, float>::value) { - slog(arg); - } else if (std::is_same<T, const uint8_t*>::value || - std::is_same<T, const char*>::value) { - klog(arg); - } else if (std::is_same<T, uint8_t*>::value || - std::is_same<T, char*>::value) { -# define haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL) - const uint32_t *p_str = (const uint32_t*) (uint32_t) arg; - uint32_t data = 0; - int pos = 0; - do { - data = *p_str; - p_str++; - clog(data); - } while (!haszero(data)); - } else { - slog(arg); - } -} - -extern "C" inline int printf(const char *format, ...) { - printf_fmt(format); - return 0; -} - -template <typename T1> -static inline void printf(const char *format, const T1 arg1) { - printf_arg(arg1); - printf_fmt(format); -} - -template <typename T1, typename T2> -static inline void printf(const char *format, const T1 arg1, const T2 arg2) { - printf_arg(arg1); - printf_arg(arg2); - printf_fmt(format); -} - -template <typename T1, typename T2, typename T3> -static inline void printf(const char *format, const T1 arg1, const T2 arg2, - const T3 arg3) { - printf_arg(arg1); - printf_arg(arg2); - printf_arg(arg3); - printf_fmt(format); -} - -template <typename T1, typename T2, typename T3, typename T4> -static inline void printf(const char *format, const T1 arg1, const T2 arg2, - const T3 arg3, const T4 arg4) { - printf_arg(arg1); - printf_arg(arg2); - printf_arg(arg3); - printf_arg(arg4); - printf_fmt(format); -} - -template <typename T1, typename T2, typename T3, typename T4, typename T5> -static inline void printf(const char *format, const T1 arg1, const T2 arg2, - const T3 arg3, const T4 arg4, const T5 arg5) { - printf_arg(arg1); - printf_arg(arg2); - printf_arg(arg3); - printf_arg(arg4); - printf_arg(arg5); - printf_fmt(format); -} - -template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> -static inline void printf(const char *format, const T1 arg1, const T2 arg2, - const T3 arg3, const T4 arg4, const T5 arg5, - const T6 arg6) { - printf_arg(arg1); - printf_arg(arg2); - printf_arg(arg3); - printf_arg(arg4); - printf_arg(arg5); - printf_arg(arg6); - printf_fmt(format); -} - -template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> -static inline void printf(const char *format, const T1 arg1, const T2 arg2, - const T3 arg3, const T4 arg4, const T5 arg5, - const T6 arg6, const T7 arg7) { - printf_arg(arg1); - printf_arg(arg2); - printf_arg(arg3); - printf_arg(arg4); - printf_arg(arg5); - printf_arg(arg6); - printf_arg(arg7); - printf_fmt(format); -} - -template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> -static inline void printf(const char *format, const T1 arg1, const T2 arg2, - const T3 arg3, const T4 arg4, const T5 arg5, - const T6 arg6, const T7 arg7, const T8 arg8) { - printf_arg(arg1); - printf_arg(arg2); - printf_arg(arg3); - printf_arg(arg4); - printf_arg(arg5); - printf_arg(arg6); - printf_arg(arg7); - printf_arg(arg8); - printf_fmt(format); -} - -#define vdump_b(vreg, str) \ -{ \ - int n; \ - getmaxvl_b(n); \ - uint8_t tmp[n]; \ - vst_b_x(vreg, tmp); \ - printf("<%s>", str); \ - for (int i = 0; i < n; ++i) { \ - printf(" %02x", uint8_t(tmp[i])); \ - } \ - printf("\n"); \ -} - -#define vdump_h(vreg, str) \ -{ \ - int n; \ - getmaxvl_h(n); \ - uint16_t tmp[n]; \ - vst_h_x(vreg, tmp); \ - printf("<%s>", str); \ - for (int i = 0; i < n; ++i) { \ - printf(" %04x", uint16_t(tmp[i])); \ - } \ - printf("\n"); \ -} - -#define vdump_w(vreg, str) \ -{ \ - int n; \ - getmaxvl_w(n); \ - uint32_t tmp[n]; \ - vst_w_x(vreg, tmp); \ - printf("<%s>", str); \ - for (int i = 0; i < n; ++i) { \ - printf(" %08x", uint32_t(tmp[i])); \ - } \ - printf("\n"); \ -} - -#define vprint_b(a, b) vdump_b(a, b) -#define vprint_h(a, b) vdump_h(a, b) -#define vprint_w(a, b) vdump_w(a, b) - -static void assert(bool v, const char* s = "") { - if (v) return; - printf("**assert[%s]\n", s); - exit(-1); -} +static_assert(sizeof(struct vconv_u8_t) == 4); template<typename T> static T* mmap_uncached(T* ptr) { @@ -280,8 +70,4 @@ #define __aligned__ __attribute__ ((aligned(64))) #define __noinit__ __attribute__ ((section(".noinit"))) -extern "C" void *malloc(size_t size); - -extern "C" void free(void *ptr); - #endif // KELVIN_H_