Optimized TFLM Reshape for kelvin

- Add a TFLM patch which contains a Kelvin-specific kernel for reshape,
  which is implemented using an optimized memcpy.
- Add the `opt` library, which at the moment simply contains the
  optimized memcpy routine.
- Wrap up the `reshape_test` from TFLM into a buildable target.

Change-Id: I77f54fdc635838e8d272e47a8ff8654e7f23373e
diff --git a/tests/tflm/BUILD b/tests/tflm/BUILD
new file mode 100644
index 0000000..de9127f
--- /dev/null
+++ b/tests/tflm/BUILD
@@ -0,0 +1,18 @@
+load("//build_tools/bazel:kelvin.bzl", "kelvin_test")
+package(default_visibility = ["//visibility:public"])
+
+kelvin_test(
+    name = "reshape_test",
+    srcs = [
+        "@tflite-micro//tensorflow/lite/micro/kernels:reshape_test.cc",
+    ],
+    deps = [
+        "//crt:crt_header",
+        "@tflite-micro//tensorflow/lite/c:common",
+        "@tflite-micro//tensorflow/lite/kernels/internal:tensor",
+        "@tflite-micro//tensorflow/lite/micro/kernels:kernel_runner",
+        "@tflite-micro//tensorflow/lite/micro/testing:micro_test",
+        "@tflite-micro//tensorflow/lite/micro:micro_utils",
+        "@tflite-micro//tensorflow/lite/micro:test_helpers",
+    ],
+)
diff --git a/tflm/opt/BUILD b/tflm/opt/BUILD
new file mode 100644
index 0000000..abf99e6
--- /dev/null
+++ b/tflm/opt/BUILD
@@ -0,0 +1,16 @@
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "opt",
+    srcs = [
+        "memcpy.cc",
+    ],
+    hdrs = [
+        "opt.h",
+    ],
+    deps = [
+        "//crt:crt_header",
+    ],
+    alwayslink = True,
+    target_compatible_with = ["@kelvin_sw//platforms/cpu:kelvin"],
+)
diff --git a/tflm/opt/memcpy.cc b/tflm/opt/memcpy.cc
new file mode 100644
index 0000000..24df6f3
--- /dev/null
+++ b/tflm/opt/memcpy.cc
@@ -0,0 +1,29 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "crt/kelvin.h"
+
+namespace kelvin::opt {
+
+void *memcpy(void *dst, const void *src, size_t n) {
+  const uint8_t *s = reinterpret_cast<const uint8_t *>(src);
+  uint8_t *d = reinterpret_cast<uint8_t *>(dst);
+  int vl;
+  while (true) {
+    if (n <= 0) break;
+    getvl_b_x_m(vl, n);
+    n -= vl;
+    vld_b_lp_xx_m(v0, s, vl);
+    vst_b_lp_xx_m(v0, d, vl);
+
+    if (n <= 0) break;
+    getvl_b_x_m(vl, n);
+    n -= vl;
+    vld_b_lp_xx_m(v4, s, vl);
+    vst_b_lp_xx_m(v4, d, vl);
+  }
+  return dst;
+}
+
+}  // namespace kelvin::opt
diff --git a/tflm/opt/opt.h b/tflm/opt/opt.h
new file mode 100644
index 0000000..5574daf
--- /dev/null
+++ b/tflm/opt/opt.h
@@ -0,0 +1,12 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef OPT_OPT_H_
+#define OPT_OPT_H_
+
+namespace kelvin::opt {
+void *memcpy(void *dst, const void *src, size_t n);
+}  // namespace kelvin::opt
+
+#endif  // OPT_OPT_H_