Patch optimized memcpy into kelvin libc

Change-Id: I25e52cc01c44be86a537f1bd32c9b6c36883f374
diff --git a/patches/riscv-gcc/0001-Define-__KELVIN__.patch b/patches/riscv-gcc/0001-Define-__KELVIN__.patch
new file mode 100644
index 0000000..176c790
--- /dev/null
+++ b/patches/riscv-gcc/0001-Define-__KELVIN__.patch
@@ -0,0 +1,25 @@
+From 6c9f07b278ff1ea5fc3dd5504a608dca90bc4712 Mon Sep 17 00:00:00 2001
+From: Alex Van Damme <atv@google.com>
+Date: Wed, 27 Mar 2024 07:08:46 +0000
+Subject: [PATCH] Define __KELVIN__
+
+---
+ gcc/c-family/c-cppbuiltin.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
+index db91a36794a..1a61a02ca57 100644
+--- a/gcc/c-family/c-cppbuiltin.c
++++ b/gcc/c-family/c-cppbuiltin.c
+@@ -1477,6 +1477,8 @@ c_cpp_builtins (cpp_reader *pfile)
+      format.  */
+   if (ENABLE_DECIMAL_FLOAT && ENABLE_DECIMAL_BID_FORMAT)
+     cpp_define (pfile, "__DECIMAL_BID_FORMAT__");
++
++  cpp_define (pfile, "__KELVIN__");
+ }
+ 
+ /* Pass an object-like macro.  If it doesn't lie in the user's
+-- 
+2.44.0.396.g6e790dbe36-goog
+
diff --git a/patches/riscv-newlib/0001-Add-memcpy-kelvin.c.patch b/patches/riscv-newlib/0001-Add-memcpy-kelvin.c.patch
new file mode 100644
index 0000000..f35a771
--- /dev/null
+++ b/patches/riscv-newlib/0001-Add-memcpy-kelvin.c.patch
@@ -0,0 +1,143 @@
+From a4cdbc3086708450ae8163a2e2c93ba9b0f631fa Mon Sep 17 00:00:00 2001
+From: Alex Van Damme <atv@google.com>
+Date: Wed, 27 Mar 2024 07:13:37 +0000
+Subject: [PATCH] Add memcpy-kelvin.c
+
+---
+ newlib/libc/machine/riscv/Makefile.am     |  2 +-
+ newlib/libc/machine/riscv/Makefile.in     | 10 ++++-
+ newlib/libc/machine/riscv/memcpy-asm.S    |  2 +-
+ newlib/libc/machine/riscv/memcpy-kelvin.c | 46 +++++++++++++++++++++++
+ newlib/libc/machine/riscv/memcpy.c        |  2 +-
+ 5 files changed, 57 insertions(+), 5 deletions(-)
+ create mode 100644 newlib/libc/machine/riscv/memcpy-kelvin.c
+
+diff --git a/newlib/libc/machine/riscv/Makefile.am b/newlib/libc/machine/riscv/Makefile.am
+index 017b4be2e..c18f728a1 100644
+--- a/newlib/libc/machine/riscv/Makefile.am
++++ b/newlib/libc/machine/riscv/Makefile.am
+@@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES)
+ 
+ noinst_LIBRARIES = lib.a
+ 
+-lib_a_SOURCES = memmove.S memmove-stub.c memset.S memcpy-asm.S memcpy.c strlen.c \
++lib_a_SOURCES = memmove.S memmove-stub.c memset.S memcpy-asm.S memcpy.c memcpy-kelvin.c strlen.c \
+ 	strcpy.c strcmp.S setjmp.S ieeefp.c ffs.c
+ lib_a_CCASFLAGS=$(AM_CCASFLAGS)
+ lib_a_CFLAGS=$(AM_CFLAGS)
+diff --git a/newlib/libc/machine/riscv/Makefile.in b/newlib/libc/machine/riscv/Makefile.in
+index e6dee8763..0eefef21d 100644
+--- a/newlib/libc/machine/riscv/Makefile.in
++++ b/newlib/libc/machine/riscv/Makefile.in
+@@ -71,7 +71,7 @@ lib_a_AR = $(AR) $(ARFLAGS)
+ lib_a_LIBADD =
+ am_lib_a_OBJECTS = lib_a-memmove.$(OBJEXT) \
+ 	lib_a-memmove-stub.$(OBJEXT) lib_a-memset.$(OBJEXT) \
+-	lib_a-memcpy-asm.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \
++	lib_a-memcpy-asm.$(OBJEXT) lib_a-memcpy.$(OBJEXT) lib_a-memcpy-kelvin.$(OBJEXT) \
+ 	lib_a-strlen.$(OBJEXT) lib_a-strcpy.$(OBJEXT) \
+ 	lib_a-strcmp.$(OBJEXT) lib_a-setjmp.$(OBJEXT) \
+ 	lib_a-ieeefp.$(OBJEXT) lib_a-ffs.$(OBJEXT)
+@@ -200,7 +200,7 @@ AUTOMAKE_OPTIONS = cygnus
+ INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
+ AM_CCASFLAGS = $(INCLUDES)
+ noinst_LIBRARIES = lib.a
+-lib_a_SOURCES = memmove.S memmove-stub.c memset.S memcpy-asm.S memcpy.c strlen.c \
++lib_a_SOURCES = memmove.S memmove-stub.c memset.S memcpy-asm.S memcpy.c memcpy-kelvin.c strlen.c \
+ 	strcpy.c strcmp.S setjmp.S ieeefp.c ffs.c
+ 
+ lib_a_CCASFLAGS = $(AM_CCASFLAGS)
+@@ -313,6 +313,12 @@ lib_a-memcpy.o: memcpy.c
+ lib_a-memcpy.obj: memcpy.c
+ 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memcpy.obj `if test -f 'memcpy.c'; then $(CYGPATH_W) 'memcpy.c'; else $(CYGPATH_W) '$(srcdir)/memcpy.c'; fi`
+ 
++lib_a-memcpy-kelvin.o: memcpy-kelvin.c
++	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memcpy-kelvin.o `test -f 'memcpy-kelvin.c' || echo '$(srcdir)/'`memcpy-kelvin.c
++
++lib_a-memcpy-kelvin.obj: memcpy-kelvin.c
++	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memcpy-kelvin.obj `if test -f 'memcpy-kelvin.c'; then $(CYGPATH_W) 'memcpy-kelvin.c'; else $(CYGPATH_W) '$(srcdir)/memcpy-kelvin.c'; fi`
++
+ lib_a-strlen.o: strlen.c
+ 	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-strlen.o `test -f 'strlen.c' || echo '$(srcdir)/'`strlen.c
+ 
+diff --git a/newlib/libc/machine/riscv/memcpy-asm.S b/newlib/libc/machine/riscv/memcpy-asm.S
+index 5571e4704..2a271b095 100644
+--- a/newlib/libc/machine/riscv/memcpy-asm.S
++++ b/newlib/libc/machine/riscv/memcpy-asm.S
+@@ -9,7 +9,7 @@
+    http://www.opensource.org/licenses.
+ */
+ 
+-#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
++#if (defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)) && !defined(__KELVIN__)
+ .text
+ .global memcpy
+ .type	memcpy, @function
+diff --git a/newlib/libc/machine/riscv/memcpy-kelvin.c b/newlib/libc/machine/riscv/memcpy-kelvin.c
+new file mode 100644
+index 000000000..6873b56c4
+--- /dev/null
++++ b/newlib/libc/machine/riscv/memcpy-kelvin.c
+@@ -0,0 +1,46 @@
++#if defined(__KELVIN__)
++
++#define __volatile_always__ volatile
++#define ARGS_F_A(FN, A0) FN " " #A0 "\n"
++#define ARGS_F_A_A(FN, A0, A1) FN " " #A0 ", " #A1 "\n"
++#define ARGS_F_A_A_A(FN, A0, A1, A2) FN " " #A0 ", " #A1 ", " #A2 "\n"
++#define ARGS_F_A_A_A_A(FN, A0, A1, A2, A3) \
++  FN " " #A0 ", " #A1 ", " #A2 ", " #A3 "\n"
++#define getvl_b_x_m(d, s)           __asm__ __volatile__(ARGS_F_A_A("getvl.b.x.m", %0, %1) : "=r"(d) : "r"(s))
++#define vld_b_lp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.lp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
++#define vst_b_lp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.lp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
++#define vld_w_x_m(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vld.w.x.m", Vd, %0) : : "r"(s) : "memory")
++#define vst_w_x_m(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vst.w.x.m", Vd, %0) : : "r"(s) : "memory")
++
++#include <stddef.h>
++#include <stdint.h>
++
++void *
++memcpy(void *__restrict dst, const void *__restrict src, size_t n)
++{
++  const uint8_t *s = (const uint8_t *)(src);
++  uint8_t *d = (uint8_t *)(dst);
++  int vl;
++  // Storage for caller's v0-v7
++  uint32_t spill[8 * 8];
++  vst_w_x_m(v0, spill);
++  vst_w_x_m(v4, spill + 32);
++  while (1) {
++    if (n <= 0) break;
++    getvl_b_x_m(vl, n);
++    n -= vl;
++    vld_b_lp_xx_m(v0, s, vl);
++    vst_b_lp_xx_m(v0, d, vl);
++
++    if (n <= 0) break;
++    getvl_b_x_m(vl, n);
++    n -= vl;
++    vld_b_lp_xx_m(v4, s, vl);
++    vst_b_lp_xx_m(v4, d, vl);
++  }
++  vld_w_x_m(v0, spill);
++  vld_w_x_m(v4, spill + 32);
++  return dst;
++}
++
++#endif  // defined(__KELVIN__)
+diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
+index 4098f3ab1..4e6c55071 100644
+--- a/newlib/libc/machine/riscv/memcpy.c
++++ b/newlib/libc/machine/riscv/memcpy.c
+@@ -9,7 +9,7 @@
+    http://www.opensource.org/licenses.
+ */
+ 
+-#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
++#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) || defined(__KELVIN__)
+ //memcpy defined in memcpy-asm.S
+ #else
+ 
+-- 
+2.44.0.396.g6e790dbe36-goog
+
diff --git a/platforms/shodan/riscv_toolchain.mk b/platforms/shodan/riscv_toolchain.mk
index 71fd52b..10670a2 100644
--- a/platforms/shodan/riscv_toolchain.mk
+++ b/platforms/shodan/riscv_toolchain.mk
@@ -179,7 +179,7 @@
 # binutil_2.40 has special doc targets for gas/doc/asconfig.texi. Need to patch the
 # configured Makefile.
 	./scripts/update-toolchain-makefile.sh "$(TOOLCHAIN_KELVIN_BUILD_DIR)/Makefile"
-	$(MAKE) -C $(TOOLCHAIN_KELVIN_BUILD_DIR)
+	$(MAKE) -C $(TOOLCHAIN_KELVIN_BUILD_DIR) -j$(nproc)
 	$(MAKE) -C $(TOOLCHAIN_KELVIN_BUILD_DIR) clean
 
 $(OUT)/toolchain_kelvin_$(TOOLCHAIN_BUILD_DATE).tar.gz: $(TOOLCHAIN_KELVIN_BIN)