Add meta-data to generic benchmark (#2496)

@tensorflow/micro

Add meta-data strings to the binary.  Meta-data is output each time binary is executed.  See the README for a sample of the output.

If a Git repo. is not available at ```${TENSORFLOW_ROOT}```, an appropriate message is generated in the meta-data.

Changes to how the CMSISNN and XTENSA NN library download scripts work.

Adds run of generic benchmark with embedded model for default build of x86, Corstone-300, Xtensa to test scripts executed during CI.

bug=fixes #2495
diff --git a/tensorflow/lite/micro/tools/benchmarking/BUILD b/tensorflow/lite/micro/tools/benchmarking/BUILD
index 1c3ebd4..6691ac3 100644
--- a/tensorflow/lite/micro/tools/benchmarking/BUILD
+++ b/tensorflow/lite/micro/tools/benchmarking/BUILD
@@ -20,6 +20,8 @@
 cc_library(
     name = "generic_benchmark_lib",
     srcs = ["generic_model_benchmark.cc"],
+    hdrs = ["show_meta_data.h"],
+    defines = ["GENERIC_BENCHMARK_NO_META_DATA"],
     deps = [
         ":metrics",
         ":op_resolver",
diff --git a/tensorflow/lite/micro/tools/benchmarking/Makefile.inc b/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
index 32e782c..396e701 100644
--- a/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
+++ b/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
@@ -1,30 +1,59 @@
 MICROLITE_BENCHMARK_ROOT_DIR := $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/benchmarking
 
-ifneq ($(BENCHMARK_MODEL_PATH),)
-    GENERIC_BENCHMARK_MODEL_DIR := $(dir $(BENCHMARK_MODEL_PATH))
-    GENERIC_BENCHMARK_MODEL_NAME := $(notdir $(basename $(BENCHMARK_MODEL_PATH)))
-    CXXFLAGS += -DMODEL_HEADER_PATH=\"$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h\"
-    CXXFLAGS += -DMODEL_NAME=$(GENERIC_BENCHMARK_MODEL_NAME)
-ifneq ($(BENCHMARK_ARENA_SIZE),)
-    CXXFLAGS += -DTENSOR_ARENA_SIZE=$(BENCHMARK_ARENA_SIZE)
+ifneq ($(GENERIC_BENCHMARK_MODEL_PATH),)
+    GENERIC_BENCHMARK_MODEL_DIR := $(dir $(GENERIC_BENCHMARK_MODEL_PATH))
+    GENERIC_BENCHMARK_MODEL_NAME := $(notdir $(basename $(GENERIC_BENCHMARK_MODEL_PATH)))
+    CXXFLAGS += -DGENERIC_BENCHMARK_USING_BUILTIN_MODEL
+    CXXFLAGS += -DGENERIC_BENCHMARK_MODEL_HEADER_PATH=\"$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h\"
+    CXXFLAGS += -DGENERIC_BENCHMARK_MODEL_NAME=$(GENERIC_BENCHMARK_MODEL_NAME)
+ifneq ($(GENERIC_BENCHMARK_ARENA_SIZE),)
+    CXXFLAGS += -DGENERIC_BENCHMARK_TENSOR_ARENA_SIZE=$(GENERIC_BENCHMARK_ARENA_SIZE)
 endif
 
-    GENERIC_BENCHMARK_GENERATOR_INPUTS := $(TENSORFLOW_ROOT)$(BENCHMARK_MODEL_PATH)
+    # model path includes $(TENSORFLOW_ROOT) as part of the make invocation
+    GENERIC_BENCHMARK_GENERATOR_INPUTS := $(GENERIC_BENCHMARK_MODEL_PATH)
 
     GENERIC_BENCHMARK_GENERATED_SRCS := \
-    $(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.cc
+    $(GENERATED_SRCS_DIR)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.cc
 
     GENERIC_BENCHMARK_GENERATED_HDRS := \
-    $(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h
+    $(GENERATED_SRCS_DIR)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h
 endif
 
 GENERIC_BENCHMARK_SRCS := \
 $(MICROLITE_BENCHMARK_ROOT_DIR)/generic_model_benchmark.cc \
-$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.cc
+$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.cc \
+$(GENERATED_SRCS_DIR)$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc
 
 GENERIC_BENCHMARK_HDRS := \
 $(MICROLITE_BENCHMARK_ROOT_DIR)/op_resolver.h \
-$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.h
+$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.h \
+$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.h
+
+# always rebuild these to catch MODEL_PATH and ARENA_SIZE changes on command line
+.PHONY: $(GENERATED_SRCS_DIR)$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc
+.PHONY: $(MICROLITE_BENCHMARK_ROOT_DIR)/generic_model_benchmark.cc
+
+$(GENERATED_SRCS_DIR)$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc:
+	CC="$(CC)" \
+	CXX="$(CXX)" \
+	CC_FLAGS="$(CCFLAGS)" \
+	CXX_FLAGS="$(CXXFLAGS)" \
+	KERNEL_OPTIMIZATION="$(KERNEL_OPTIMIZATION_LEVEL)" \
+	CORE_OPTIMIZATION="$(CORE_OPTIMIZATION_LEVEL)" \
+	THIRD_PARTY_KERNEL_OPTIMIZATION="$(THIRD_PARTY_KERNEL_OPTIMIZATION_LEVEL)" \
+	TARGET=$(TARGET) \
+	TARGET_ARCH=$(TARGET_ARCH) \
+	TENSORFLOW_ROOT="$(TENSORFLOW_ROOT)" \
+	OPTIMIZED_KERNEL=$(OPTIMIZED_KERNEL_DIR) \
+	BUILD_TYPE=$(BUILD_TYPE) \
+	XTENSA_CORE=$(XTENSA_CORE) \
+	XTENSA_BASE=$(XTENSA_BASE) \
+	XTENSA_TOOLS_VERSION=$(XTENSA_TOOLS_VERSION) \
+	TEMPLATE_FILE="$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc.template" \
+	GENERATED_FILE="$(GENERATED_SRCS_DIR)$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc" \
+	MODEL_FILE="$(GENERIC_BENCHMARK_MODEL_PATH)" \
+	$(MICROLITE_BENCHMARK_ROOT_DIR)/collect_meta_data.sh
 
 ifneq ($(TARGET),bluepill)
 ifneq ($(TARGET_ARCH), $(filter $(TARGET_ARCH), hifimini))
diff --git a/tensorflow/lite/micro/tools/benchmarking/README.md b/tensorflow/lite/micro/tools/benchmarking/README.md
index 0bc727e..45f52b5 100644
--- a/tensorflow/lite/micro/tools/benchmarking/README.md
+++ b/tensorflow/lite/micro/tools/benchmarking/README.md
@@ -8,15 +8,15 @@
 
 Building the tool with the model compiled in uses two additional Makefile
 variables:
-* `BENCHMARK_MODEL_PATH`: the path to the TfLite format model file.  This
+* `GENERIC_BENCHMARK_MODEL_PATH`: the path to the TfLite format model file.  This
 can be a relative or absolute path.  This variable is required.
-* `BENCHMARK_ARENA_SIZE`: the size of the TFLM interpreter arena, in bytes.
+* `GENERIC_BENCHMARK_ARENA_SIZE`: the size of the TFLM interpreter arena, in bytes.
 This variable is optional.
 
 ## Tested, working targets
 * x86
 * cortex_m_qemu (no timing data)
-* Xtensa
+* Xtensa (p6, hifi3)
 * cortex_m_corstone_300
 
 ## Tested, non-working targets
@@ -32,17 +32,388 @@
 gen/linux_x86_64_default/bin/tflm_benchmark tensorflow/lite/micro/models/person_detect.tflite
 ```
 
-Build with model compiled into tool:
+Build and run with model compiled into tool:
 ```
-make -f tensorflow/lite/micro/tools/make/Makefile tflm_benchmark -j$(nproc) BENCHMARK_MODEL_PATH=tensorflow/lite/micro/models/person_detect.tflite BENCHMARK_ARENA_SIZE=`expr 100 \* 1024`
-```
-Run with model compiled into tool:
-```
-gen/linux_x86_64_default/bin/tflm_benchmark
+make -f tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=default run_tflm_benchmark -j$(nproc) GENERIC_BENCHMARK_MODEL_PATH=tensorflow/lite/micro/models/person_detect.tflite GENERIC_BENCHMARK_ARENA_SIZE=`expr 150 \* 1024`
 ```
 
 ## Build and run for Xtensa
 Build and run with model compiled into tool:
 ```
-make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=vision_p6 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=P6_200528 BUILD_TYPE=default run_tflm_benchmark -j$(nproc) BENCHMARK_MODEL_PATH=/tmp/keyword_scrambled.tflite BENCHMARK_ARENA_SIZE=`expr 50 \* 1024`
+make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=vision_p6 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=P6_200528 BUILD_TYPE=default run_tflm_benchmark -j$(nproc) GENERIC_BENCHMARK_MODEL_PATH=/tmp/keyword_scrambled.tflite GENERIC_BENCHMARK_ARENA_SIZE=`expr 50 \* 1024`
+```
+
+## Build and run for Cortex-M using Corstone 300 simulator
+Build and run with model compiled into tool:
+```
+make -f tensorflow/lite/micro/tools/make/Makefile   TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m0   OPTIMIZED_KERNEL_DIR=cmsis_nn   BUILD_TYPE=default run_tflm_benchmark -j$(nproc) GENERIC_BENCHMARK_MODEL_PATH=tensorflow/lite/micro/models/person_detect.tflite GENERIC_BENCHMARK_ARENA_SIZE=`expr 150 \* 1024`
+```
+
+## Build and run using Bazel
+
+This is only for the x86 command line argument build, and does not contain meta-data:
+```
+bazel build tensorflow/lite/micro/tools/benchmarking:tflm_benchmark
+bazel-bin/tensorflow/lite/micro/tools/benchmarking/tflm_benchmark tensorflow/lite/micro/models/person_detect.tflite
+```
+
+## Example output with meta-data and built-in model layer information
+
+This sample output is for Cortex-M using Corstone 300:
+```
+Configured arena size = 153600
+
+--------------------
+Compiled on:
+
+Thu Mar  7 04:59:13 AM PST 2024
+--------------------
+Git SHA: 27b1f546cec03c87deaf2ff94c830f9cbd0f2e69
+
+Git status:
+
+On branch main
+Your branch is up to date with 'origin/main'.
+
+Untracked files:
+  (use "git add <file>..." to include in what will be committed)
+	MODULE.bazel
+	MODULE.bazel.lock
+nothing added to commit but untracked files present (use "git add" to track)
+--------------------
+C compiler: tensorflow/lite/micro/tools/make/downloads/gcc_embedded/bin/arm-none-eabi-gcc
+Version:
+
+arm-none-eabi-gcc (Arm GNU Toolchain 13.2.rel1 (Build arm-13.7)) 13.2.1 20231009
+Copyright (C) 2023 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Flags:
+
+-Wimplicit-function-declaration -std=c11 -Werror -fno-unwind-tables -ffunction-sections 
+-fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON 
+-DCMSIS_NN -DKERNELS_OPTIMIZED_FOR_SPEED -mcpu=cortex-m0 -mfpu=auto 
+-DTF_LITE_MCU_DEBUG_LOG -mthumb -mfloat-abi=soft -funsigned-char -mlittle-endian 
+-fomit-frame-pointer -MD -DARMCM0
+
+C++ compiler: tensorflow/lite/micro/tools/make/downloads/gcc_embedded/bin/arm-none-eabi-g++
+Version:
+
+arm-none-eabi-g++ (Arm GNU Toolchain 13.2.rel1 (Build arm-13.7)) 13.2.1 20231009
+Copyright (C) 2023 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Flags:
+
+-std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -Wnon-virtual-dtor -Werror 
+-fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 
+-DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -Wsign-compare -Wdouble-promotion 
+-Wunused-variable -Wunused-function -Wswitch -Wvla -Wall -Wextra 
+-Wmissing-field-initializers -Wstrict-aliasing -Wno-unused-parameter -DCMSIS_NN 
+-DKERNELS_OPTIMIZED_FOR_SPEED -mcpu=cortex-m0 -mfpu=auto -DTF_LITE_MCU_DEBUG_LOG -mthumb 
+-mfloat-abi=soft -funsigned-char -mlittle-endian -fomit-frame-pointer -MD -DARMCM0 
+-DCMSIS_DEVICE_ARM_CORTEX_M_XX_HEADER_FILE="ARMCM0.h" 
+-DGENERIC_BENCHMARK_USING_BUILTIN_MODEL 
+-DGENERIC_BENCHMARK_MODEL_HEADER_PATH="tensorflow/lite/micro/models/person_detect_model_da
+ta.h" -DGENERIC_BENCHMARK_MODEL_NAME=person_detect 
+-DGENERIC_BENCHMARK_TENSOR_ARENA_SIZE=153600 -DGENERIC_BENCHMARK_SHOW_META_DATA
+
+Optimization: kernel= -O2  core= -Os  third-party-kernel= -O2
+--------------------
+Target information:
+
+TARGET=cortex_m_corstone_300
+TARGET_ARCH=cortex-m0
+OPTIMIZATION=cmsis_nn
+BUILD_TYPE=default
+--------------------
+NN library download URLs:
+
+http://github.com/ARM-software/CMSIS-NN/archive/8492d82a1a81651977c5f5128492b4a0f0cf6715.z
+ip
+
+NN library MD5 checksums:
+
+2cb03e4f044b78af6751009cd53247a8
+--------------------
+Model SHA1:
+
+bcafcaa99d2eaf089f0ca25d66f56a2177e93f76
+
+Model analysis:
+
+=== tensorflow/lite/micro/models/person_detect.tflite ===
+Your TFLite model has '1' subgraph(s). In the subgraph description below,
+T# represents the Tensor numbers. For example, in Subgraph#0, the DEPTHWISE_CONV_2D op 
+takes
+tensor #88 and tensor #0 and tensor #33 as input and produces tensor #34 as output.
+Subgraph#0(T#88) -> [T#87]
+  Op#0 DEPTHWISE_CONV_2D(T#88, T#0, T#33[3774, -107, -84394, -13908, 20697, ...]) -> 
+[T#34]
+  Op#1 DEPTHWISE_CONV_2D(T#34, T#9, T#52[31132, 28, 273, -2692, 7409, ...]) -> [T#51]
+  Op#2 CONV_2D(T#51, T#10, T#53[10064, 1130, -13056, -30284, -23349, ...]) -> [T#54]
+  Op#3 DEPTHWISE_CONV_2D(T#54, T#11, T#56[306, -158, 19181, -364, 6237, ...]) -> [T#55]
+  Op#4 CONV_2D(T#55, T#12, T#57[-7649, 12287, -4433, 5851, -188, ...]) -> [T#58]
+  Op#5 DEPTHWISE_CONV_2D(T#58, T#13, T#60[7297, -498, 263, -1975, 2260, ...]) -> [T#59]
+  Op#6 CONV_2D(T#59, T#14, T#61[-4742, -4160, 6985, 8647, 29773, ...]) -> [T#62]
+  Op#7 DEPTHWISE_CONV_2D(T#62, T#15, T#64[28588, 363, 27592, 22294, -4344, ...]) -> [T#63]
+  Op#8 CONV_2D(T#63, T#16, T#65[12683, 36581, 6206, 1236, 15834, ...]) -> [T#66]
+  Op#9 DEPTHWISE_CONV_2D(T#66, T#17, T#68[-6353, 9090, -30, -1019, -496, ...]) -> [T#67]
+  Op#10 CONV_2D(T#67, T#18, T#69[3895, -6563, -8843, -2066, -1372, ...]) -> [T#70]
+  Op#11 DEPTHWISE_CONV_2D(T#70, T#19, T#72[20437, -365, -2518, 20827, -904, ...]) -> 
+[T#71]
+  Op#12 CONV_2D(T#71, T#20, T#73[-10120, 9768, 3524, 3796, 6896, ...]) -> [T#74]
+  Op#13 DEPTHWISE_CONV_2D(T#74, T#21, T#76[-3969, -1910, -2425, -114, 4456, ...]) -> 
+[T#75]
+  Op#14 CONV_2D(T#75, T#22, T#77[-13202, 13929, -4357, 19492, 1971, ...]) -> [T#78]
+  Op#15 DEPTHWISE_CONV_2D(T#78, T#23, T#80[-6169, -10, -2788, 14420, -7457, ...]) -> 
+[T#79]
+  Op#16 CONV_2D(T#79, T#24, T#81[155, -3073, 291, -902, -9942, ...]) -> [T#82]
+  Op#17 DEPTHWISE_CONV_2D(T#82, T#25, T#84[-2063, 10755, -12037, -6417, 2147, ...]) -> 
+[T#83]
+  Op#18 CONV_2D(T#83, T#26, T#85[-1872, -7549, 13994, 3191, -614, ...]) -> [T#86]
+  Op#19 DEPTHWISE_CONV_2D(T#86, T#1, T#36[-6485, 294, 686, -6011, -5196, ...]) -> [T#35]
+  Op#20 CONV_2D(T#35, T#2, T#37[7116, 8066, 11755, 11674, 9983, ...]) -> [T#38]
+  Op#21 DEPTHWISE_CONV_2D(T#38, T#3, T#40[7735, 5235, 4334, -6485, 9397, ...]) -> [T#39]
+  Op#22 CONV_2D(T#39, T#4, T#41[2947, 10152, -7865, -554, -13760, ...]) -> [T#42]
+  Op#23 DEPTHWISE_CONV_2D(T#42, T#5, T#44[-4755, 7899, -488, -2954, 2990, ...]) -> [T#43]
+  Op#24 CONV_2D(T#43, T#6, T#45[-6269, -22458, 13332, -16368, 4435, ...]) -> [T#46]
+  Op#25 DEPTHWISE_CONV_2D(T#46, T#7, T#48[333, -4743, -310, -2471, 4804, ...]) -> [T#47]
+  Op#26 CONV_2D(T#47, T#8, T#49[6677, -3593, 3754, 26316, -4761, ...]) -> [T#50]
+  Op#27 AVERAGE_POOL_2D(T#50) -> [T#27]
+  Op#28 CONV_2D(T#27, T#30, T#29[16267, -17079]) -> [T#28]
+  Op#29 RESHAPE(T#28, T#32[1, 2]) -> [T#31]
+  Op#30 SOFTMAX(T#31) -> [T#87]
+Tensors of Subgraph#0
+  T#0(MobilenetV1/Conv2d_0/weights/read) shape:[1, 3, 3, 8], type:INT8 RO 72 bytes, 
+buffer: 68, data:[., y, ., g, ., ...]
+  T#1(MobilenetV1/Conv2d_10_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128], 
+type:INT8 RO 1152 bytes, buffer: 72, data:[W, ., d, ., ., ...]
+  T#2(MobilenetV1/Conv2d_10_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO 
+16384 bytes, buffer: 14, data:[., ., 
+, ., ., ...]
+  T#3(MobilenetV1/Conv2d_11_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128], 
+type:INT8 RO 1152 bytes, buffer: 13, data:[., `, ., :, ., ...]
+  T#4(MobilenetV1/Conv2d_11_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO 
+16384 bytes, buffer: 12, data:[., ., ., ., ., ...]
+  T#5(MobilenetV1/Conv2d_12_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128], 
+type:INT8 RO 1152 bytes, buffer: 10, data:[z, ., ., ?, ., ...]
+  T#6(MobilenetV1/Conv2d_12_pointwise/weights/read) shape:[256, 1, 1, 128], type:INT8 RO 
+32768 bytes, buffer: 69, data:[/, ., ., ., #, ...]
+  T#7(MobilenetV1/Conv2d_13_depthwise/depthwise_weights/read) shape:[1, 3, 3, 256], 
+type:INT8 RO 2304 bytes, buffer: 7, data:[., ., w, ., ., ...]
+  T#8(MobilenetV1/Conv2d_13_pointwise/weights/read) shape:[256, 1, 1, 256], type:INT8 RO 
+65536 bytes, buffer: 5, data:[&, ., ., ., ., ...]
+  T#9(MobilenetV1/Conv2d_1_depthwise/depthwise_weights/read) shape:[1, 3, 3, 8], 
+type:INT8 RO 72 bytes, buffer: 60, data:[., ., ., ., ., ...]
+  T#10(MobilenetV1/Conv2d_1_pointwise/weights/read) shape:[16, 1, 1, 8], type:INT8 RO 128 
+bytes, buffer: 63, data:[., ., ., ., ., ...]
+  T#11(MobilenetV1/Conv2d_2_depthwise/depthwise_weights/read) shape:[1, 3, 3, 16], 
+type:INT8 RO 144 bytes, buffer: 58, data:[O, *, ., !, ., ...]
+  T#12(MobilenetV1/Conv2d_2_pointwise/weights/read) shape:[32, 1, 1, 16], type:INT8 RO 
+512 bytes, buffer: 61, data:[., 4, ., ., 8, ...]
+  T#13(MobilenetV1/Conv2d_3_depthwise/depthwise_weights/read) shape:[1, 3, 3, 32], 
+type:INT8 RO 288 bytes, buffer: 35, data:[., 1, ;, M, ., ...]
+  T#14(MobilenetV1/Conv2d_3_pointwise/weights/read) shape:[32, 1, 1, 32], type:INT8 RO 
+1024 bytes, buffer: 33, data:[., ., ., ., ., ...]
+  T#15(MobilenetV1/Conv2d_4_depthwise/depthwise_weights/read) shape:[1, 3, 3, 32], 
+type:INT8 RO 288 bytes, buffer: 32, data:[., ;, ., ., ., ...]
+  T#16(MobilenetV1/Conv2d_4_pointwise/weights/read) shape:[64, 1, 1, 32], type:INT8 RO 
+2048 bytes, buffer: 30, data:[., ., ., 5, ., ...]
+  T#17(MobilenetV1/Conv2d_5_depthwise/depthwise_weights/read) shape:[1, 3, 3, 64], 
+type:INT8 RO 576 bytes, buffer: 77, data:[G, ., ., ., ., ...]
+  T#18(MobilenetV1/Conv2d_5_pointwise/weights/read) shape:[64, 1, 1, 64], type:INT8 RO 
+4096 bytes, buffer: 28, data:[., 2, ., $, ., ...]
+  T#19(MobilenetV1/Conv2d_6_depthwise/depthwise_weights/read) shape:[1, 3, 3, 64], 
+type:INT8 RO 576 bytes, buffer: 27, data:[., 1, z, ., U, ...]
+  T#20(MobilenetV1/Conv2d_6_pointwise/weights/read) shape:[128, 1, 1, 64], type:INT8 RO 
+8192 bytes, buffer: 25, data:[5, ., ., ., V, ...]
+  T#21(MobilenetV1/Conv2d_7_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128], 
+type:INT8 RO 1152 bytes, buffer: 23, data:[., ., ., ., ., ...]
+  T#22(MobilenetV1/Conv2d_7_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO 
+16384 bytes, buffer: 21, data:[., ., ., ., ., ...]
+  T#23(MobilenetV1/Conv2d_8_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128], 
+type:INT8 RO 1152 bytes, buffer: 71, data:[., ., ., ., Q, ...]
+  T#24(MobilenetV1/Conv2d_8_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO 
+16384 bytes, buffer: 20, data:[@, ., 2, ., 8, ...]
+  T#25(MobilenetV1/Conv2d_9_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128], 
+type:INT8 RO 1152 bytes, buffer: 80, data:[^, ., ~, ., ., ...]
+  T#26(MobilenetV1/Conv2d_9_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO 
+16384 bytes, buffer: 16, data:[., .,  , ., , ...]
+  T#27(MobilenetV1/Logits/AvgPool_1a/AvgPool) shape:[1, 1, 1, 256], type:INT8
+  T#28(MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd) shape:[1, 1, 1, 2], type:INT8
+  T#29(MobilenetV1/Logits/Conv2d_1c_1x1/Conv2D_bias) shape:[2], type:INT32 RO 8 bytes, 
+buffer: 2, data:[16267, -17079]
+  T#30(MobilenetV1/Logits/Conv2d_1c_1x1/weights/read) shape:[2, 1, 1, 256], type:INT8 RO 
+512 bytes, buffer: 3, data:[., , ., ., ., ...]
+  T#31(MobilenetV1/Logits/SpatialSqueeze) shape:[1, 2], type:INT8
+  T#32(MobilenetV1/Logits/SpatialSqueeze_shape) shape:[2], type:INT32 RO 8 bytes, buffer: 
+1, data:[1, 2]
+  T#33(MobilenetV1/MobilenetV1/Conv2d_0/Conv2D_bias) shape:[8], type:INT32 RO 32 bytes, 
+buffer: 82, data:[3774, -107, -84394, -13908, 20697, ...]
+  T#34(MobilenetV1/MobilenetV1/Conv2d_0/Relu6) shape:[1, 48, 48, 8], type:INT8
+  T#35(MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#36(MobilenetV1/MobilenetV1/Conv2d_10_depthwise/depthwise_bias) shape:[128], 
+type:INT32 RO 512 bytes, buffer: 22, data:[-6485, 294, 686, -6011, -5196, ...]
+  T#37(MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Conv2D_bias) shape:[128], type:INT32 
+RO 512 bytes, buffer: 70, data:[7116, 8066, 11755, 11674, 9983, ...]
+  T#38(MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#39(MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#40(MobilenetV1/MobilenetV1/Conv2d_11_depthwise/depthwise_bias) shape:[128], 
+type:INT32 RO 512 bytes, buffer: 19, data:[7735, 5235, 4334, -6485, 9397, ...]
+  T#41(MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Conv2D_bias) shape:[128], type:INT32 
+RO 512 bytes, buffer: 11, data:[2947, 10152, -7865, -554, -13760, ...]
+  T#42(MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#43(MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6) shape:[1, 3, 3, 128], type:INT8
+  T#44(MobilenetV1/MobilenetV1/Conv2d_12_depthwise/depthwise_bias) shape:[128], 
+type:INT32 RO 512 bytes, buffer: 9, data:[-4755, 7899, -488, -2954, 2990, ...]
+  T#45(MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Conv2D_bias) shape:[256], type:INT32 
+RO 1024 bytes, buffer: 8, data:[-6269, -22458, 13332, -16368, 4435, ...]
+  T#46(MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6) shape:[1, 3, 3, 256], type:INT8
+  T#47(MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6) shape:[1, 3, 3, 256], type:INT8
+  T#48(MobilenetV1/MobilenetV1/Conv2d_13_depthwise/depthwise_bias) shape:[256], 
+type:INT32 RO 1024 bytes, buffer: 6, data:[333, -4743, -310, -2471, 4804, ...]
+  T#49(MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Conv2D_bias) shape:[256], type:INT32 
+RO 1024 bytes, buffer: 4, data:[6677, -3593, 3754, 26316, -4761, ...]
+  T#50(MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6) shape:[1, 3, 3, 256], type:INT8
+  T#51(MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6) shape:[1, 48, 48, 8], type:INT8
+  T#52(MobilenetV1/MobilenetV1/Conv2d_1_depthwise/depthwise_bias) shape:[8], type:INT32 
+RO 32 bytes, buffer: 56, data:[31132, 28, 273, -2692, 7409, ...]
+  T#53(MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Conv2D_bias) shape:[16], type:INT32 RO 
+64 bytes, buffer: 36, data:[10064, 1130, -13056, -30284, -23349, ...]
+  T#54(MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6) shape:[1, 48, 48, 16], type:INT8
+  T#55(MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6) shape:[1, 24, 24, 16], type:INT8
+  T#56(MobilenetV1/MobilenetV1/Conv2d_2_depthwise/depthwise_bias) shape:[16], type:INT32 
+RO 64 bytes, buffer: 48, data:[306, -158, 19181, -364, 6237, ...]
+  T#57(MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Conv2D_bias) shape:[32], type:INT32 RO 
+128 bytes, buffer: 62, data:[-7649, 12287, -4433, 5851, -188, ...]
+  T#58(MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6) shape:[1, 24, 24, 32], type:INT8
+  T#59(MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6) shape:[1, 24, 24, 32], type:INT8
+  T#60(MobilenetV1/MobilenetV1/Conv2d_3_depthwise/depthwise_bias) shape:[32], type:INT32 
+RO 128 bytes, buffer: 34, data:[7297, -498, 263, -1975, 2260, ...]
+  T#61(MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Conv2D_bias) shape:[32], type:INT32 RO 
+128 bytes, buffer: 59, data:[-4742, -4160, 6985, 8647, 29773, ...]
+  T#62(MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6) shape:[1, 24, 24, 32], type:INT8
+  T#63(MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6) shape:[1, 12, 12, 32], type:INT8
+  T#64(MobilenetV1/MobilenetV1/Conv2d_4_depthwise/depthwise_bias) shape:[32], type:INT32 
+RO 128 bytes, buffer: 31, data:[28588, 363, 27592, 22294, -4344, ...]
+  T#65(MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Conv2D_bias) shape:[64], type:INT32 RO 
+256 bytes, buffer: 76, data:[12683, 36581, 6206, 1236, 15834, ...]
+  T#66(MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6) shape:[1, 12, 12, 64], type:INT8
+  T#67(MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6) shape:[1, 12, 12, 64], type:INT8
+  T#68(MobilenetV1/MobilenetV1/Conv2d_5_depthwise/depthwise_bias) shape:[64], type:INT32 
+RO 256 bytes, buffer: 29, data:[-6353, 9090, -30, -1019, -496, ...]
+  T#69(MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Conv2D_bias) shape:[64], type:INT32 RO 
+256 bytes, buffer: 84, data:[3895, -6563, -8843, -2066, -1372, ...]
+  T#70(MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6) shape:[1, 12, 12, 64], type:INT8
+  T#71(MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6) shape:[1, 6, 6, 64], type:INT8
+  T#72(MobilenetV1/MobilenetV1/Conv2d_6_depthwise/depthwise_bias) shape:[64], type:INT32 
+RO 256 bytes, buffer: 26, data:[20437, -365, -2518, 20827, -904, ...]
+  T#73(MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Conv2D_bias) shape:[128], type:INT32 RO 
+512 bytes, buffer: 24, data:[-10120, 9768, 3524, 3796, 6896, ...]
+  T#74(MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#75(MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#76(MobilenetV1/MobilenetV1/Conv2d_7_depthwise/depthwise_bias) shape:[128], type:INT32 
+RO 512 bytes, buffer: 78, data:[-3969, -1910, -2425, -114, 4456, ...]
+  T#77(MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Conv2D_bias) shape:[128], type:INT32 RO 
+512 bytes, buffer: 83, data:[-13202, 13929, -4357, 19492, 1971, ...]
+  T#78(MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#79(MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#80(MobilenetV1/MobilenetV1/Conv2d_8_depthwise/depthwise_bias) shape:[128], type:INT32 
+RO 512 bytes, buffer: 55, data:[-6169, -10, -2788, 14420, -7457, ...]
+  T#81(MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Conv2D_bias) shape:[128], type:INT32 RO 
+512 bytes, buffer: 18, data:[155, -3073, 291, -902, -9942, ...]
+  T#82(MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#83(MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#84(MobilenetV1/MobilenetV1/Conv2d_9_depthwise/depthwise_bias) shape:[128], type:INT32 
+RO 512 bytes, buffer: 17, data:[-2063, 10755, -12037, -6417, 2147, ...]
+  T#85(MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Conv2D_bias) shape:[128], type:INT32 RO 
+512 bytes, buffer: 15, data:[-1872, -7549, 13994, 3191, -614, ...]
+  T#86(MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+  T#87(MobilenetV1/Predictions/Reshape_1) shape:[1, 2], type:INT8
+  T#88(input) shape:[1, 96, 96, 1], type:INT8
+---------------------------------------------------------------
+              Model size:     300568 bytes
+    Non-data buffer size:      81640 bytes (27.16 )
+  Total data buffer size:     218928 bytes (72.84 )
+    (Zero value buffers):          0 bytes (00.00 )
+* Buffers of TFLite model are mostly used for constant tensors.
+  And zero value buffers are buffers filled with zeros.
+  Non-data buffers area are used to store operators, subgraphs and etc.
+  You can find more details from 
+https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/schema/schema.fbs
+--------------------
+TfliteGetModel took 0 ticks (0 ms).
+
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+AVERAGE_POOL_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+RESHAPE took 0 ticks (0 ms).
+SOFTMAX took 0 ticks (0 ms).
+
+"Unique Tag","Total ticks across all events with that tag."
+DEPTHWISE_CONV_2D, 0
+CONV_2D, 0
+AVERAGE_POOL_2D, 0
+RESHAPE, 0
+SOFTMAX, 0
+"total number of ticks", 0
+
+[[ Table ]]: Arena
+        Arena   Bytes   % Arena
+        Total | 84436 |   100.00
+NonPersistent | 55296 |    65.49
+   Persistent | 29140 |    34.51
+
+[[ Table ]]: Allocations
+                  Allocation   Id    Used   Requested   Count   % Memory
+            Eval tensor data |  0 |  1068 |      1068 |    89 |      1.26
+      Persistent tensor data |  1 |    64 |        64 |     2 |      0.08
+Persistent quantization data |  2 |    40 |        40 |     4 |      0.05
+      Persistent buffer data |  3 | 25876 |     25704 |    90 |     30.65
+ Tensor variable buffer data |  4 |     0 |         0 |     0 |      0.00
+ Node and registration array |  5 |   992 |       992 |    31 |      1.17
+              Operation data |  6 |     0 |         0 |     0 |      0.00
+
+Application exit code: 0.
+
+Info: /OSCI/SystemC: Simulation stopped by user.
+[warning ][main@0][01 ns] Simulation stopped by user
+
+--- FVP_MPS3_Corstone_SSE_300 statistics: -------------------------------------
+Simulated time                          : 5.474678s
+User time                               : 1.609473s
+System time                             : 0.123380s
+Wall time                               : 1.892705s
+Performance index                       : 2.89
+FVP_MPS3_Corstone_SSE_300.cpu0          :  78.98 MIPS (   136866941 Inst)
+-------------------------------------------------------------------------------
 ```
diff --git a/tensorflow/lite/micro/tools/benchmarking/analyze_model.py b/tensorflow/lite/micro/tools/benchmarking/analyze_model.py
new file mode 100644
index 0000000..f2ff013
--- /dev/null
+++ b/tensorflow/lite/micro/tools/benchmarking/analyze_model.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from absl import app
+from absl import flags
+
+import tensorflow as tf
+
+_MODEL_PATH = flags.DEFINE_string(
+    name='model_file',
+    default='',
+    help='path for the .tflite model file.',
+)
+
+
+def _main(_):
+  """outputs model analysis to stdout/stderr"""
+  tf.lite.experimental.Analyzer.analyze(model_path=_MODEL_PATH.value)
+
+
+if __name__ == '__main__':
+  app.run(_main)
diff --git a/tensorflow/lite/micro/tools/benchmarking/collect_meta_data.sh b/tensorflow/lite/micro/tools/benchmarking/collect_meta_data.sh
new file mode 100755
index 0000000..c60bdf3
--- /dev/null
+++ b/tensorflow/lite/micro/tools/benchmarking/collect_meta_data.sh
@@ -0,0 +1,177 @@
+#!/usr/bin/env bash
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Collect generic benchmark meta data and insert resulting strings into
+# the file designated by TEMPLATE_FILE.
+#
+# Takes no arguments.
+#
+# Uses the following environment variables:
+# TEMPLATE_FILE - path to the template source file
+# GENERATED_FILE - path to the generated source file with substituted strings
+# TENSORFLOW_ROOT - path to the root of the source tree
+# MODEL_FILE - path to the .tflite model file
+# CC - path to C compiler
+# CXX - path to C++ compiler
+# CC_FLAGS - C compiler flags
+# CXX_FLAGS - C++ compiler flags
+# KERNEL_OPTIMIZATION - kernel optimization flags
+# CORE_OPTIMIZATION - core optimization flags
+# THIRD_PARTY_KERNEL_OPTIMIZATION - third pary kernel optimization flags
+# TARGET - target platform (xtensa, cortex_m_corstone_300, etc.)
+# TARGET_ARCH - target architecture (hifi5, cortex-m0, etc.)
+# OPTIMIZED_KERNEL - optimized kernel (xtensa, cmsis_nn, etc.)
+# BUILD_TYPE - type of build (default, release, etc.)
+# XTENSA_CORE - Xtensa core specification
+# XTENSA_BASE - Xtensa base install directory
+# XTENSA_TOOLS_VERSION - Xtensa tooling version
+
+
+set -e
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+function substitute_strings() {
+  search="// %%%_$1_%%%"
+  lines=$(fold -w 90 -s <<< "$2")
+  SAVED_IFS=${IFS}
+  IFS=$'\n' lines_array=( ${lines} )
+  IFS=${SAVED_IFS}
+  replacement=()
+  for line in "${lines_array[@]}"; do
+    line=$(sed -e 's/"/\\"/g' <<< "${line}")
+    line=$(printf '"%s",\n    ' "${line}")
+    replacement+=( "${line}" )
+  done
+
+  tempfile=$(mktemp)
+
+  SEARCH_PATTERN="$search" REPLACEMENT_PATTERN="${replacement[@]}" awk '
+    BEGIN {
+        search = ENVIRON["SEARCH_PATTERN"]
+        replacement = ENVIRON["REPLACEMENT_PATTERN"]
+    }
+    s = index($0,search) {
+        $0 = substr($0,1,s-1) replacement substr($0,s+length(search))
+    }
+    { print }
+  ' "${GENERATED_FILE}" > ${tempfile}
+  mv ${tempfile} "${GENERATED_FILE}"
+}
+
+mkdir -p $(dirname ${GENERATED_FILE})
+cp -p ${TEMPLATE_FILE} ${GENERATED_FILE}
+
+# model analysis and SHA1
+if [[ ${MODEL_FILE} ]]; then
+  python3 -m pip install absl-py tensorflow
+  result=$(python3 \
+    "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/benchmarking/analyze_model.py" \
+    --model_file="${MODEL_FILE}" \
+    )
+  substitute_strings model_analysis_strings "${result}"
+
+  result=$(shasum -b "${MODEL_FILE}" | cut -f 1 -d ' ')
+  substitute_strings model_sha1_strings "${result}"
+fi
+
+# compile date
+result=$(date)
+substitute_strings compilation_date_strings "${result}"
+
+GIT_TENSORFLOW_ROOT="${TENSORFLOW_ROOT:-./}"
+set +e
+# Git repo commit information
+result=$(cd ${GIT_TENSORFLOW_ROOT} && git rev-parse --verify HEAD)
+if [[ $? != 0 ]]; then
+  result="<git commit information not available>"
+fi
+substitute_strings git_commit_strings "${result}"
+
+# Git repo status information
+result=$(cd ${GIT_TENSORFLOW_ROOT} && git status)
+if [[ $? != 0 ]]; then
+  result="<git status information not available>"
+fi
+substitute_strings git_status_strings "${result}"
+set -e
+
+# Compiler information
+result="${CC}"
+substitute_strings cc_name_strings "${result}"
+result=$("${CC}" --version)
+substitute_strings cc_version_strings "${result}"
+result="${CC_FLAGS}"
+substitute_strings cc_flags_strings "${result}"
+
+result="${CXX}"
+substitute_strings cxx_name_strings "${result}"
+result=$("${CXX}" --version)
+substitute_strings cxx_version_strings "${result}"
+result="${CXX_FLAGS}"
+substitute_strings cxx_flags_strings "${result}"
+
+result="kernel= ${KERNEL_OPTIMIZATION}"
+result+="  core= ${CORE_OPTIMIZATION}"
+result+="  third-party-kernel= ${THIRD_PARTY_KERNEL_OPTIMIZATION}"
+substitute_strings optimization_flag_strings "${result}"
+
+# Target information
+TARGET="${TARGET:-linux}"
+TARGET_ARCH="${TARGET_ARCH:-x86}"
+OPTIMIZED_KERNEL="${OPTIMIZED_KERNEL:-none}"
+BUILD_TYPE="${BUILD_TYPE:-default}"
+result=$(printf 'TARGET=%s\nTARGET_ARCH=%s\nOPTIMIZATION=%s\nBUILD_TYPE=%s\n' \
+  "${TARGET}" \
+  "${TARGET_ARCH}" \
+  "${OPTIMIZED_KERNEL}" \
+  "${BUILD_TYPE}" \
+)
+if [[ ${XTENSA_CORE} ]]; then
+  result+=$(printf '\nXTENSA_CORE=%s' "${XTENSA_CORE}")
+  result+=$(printf '\nXTENSA_BASE=%s' "${XTENSA_BASE}")
+  result+=$(printf '\nXTENSA_TOOLS_VERSION=%s' "${XTENSA_TOOLS_VERSION}")
+fi
+substitute_strings target_info_strings "${result}"
+
+download_scripts=()
+download_script_args=( "--no-downloads" )
+if [[ ${OPTIMIZED_KERNEL} == "cmsis_nn" ]]; then
+  download_scripts+=( "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh" )
+  download_script_args+=( "${TENSORFLOW_ROOT}" )
+elif [[ ${OPTIMIZED_KERNEL} == "xtensa" ]]; then
+  download_script_args+=( "${TARGET_ARCH}" "${TENSORFLOW_ROOT}" )
+  if [[ ${TARGET_ARCH} =~ ^(vision_p6)$ ]]; then
+    download_scripts+=( "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh" )
+  elif [[ ${TARGET_ARCH} =~ ^(hifi3|hifi4|hifi5)$ ]]; then
+    download_scripts+=( "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh" )
+    download_scripts+=( "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh" )
+  fi
+fi
+
+if [[ ${#download_scripts[@]} -gt 0 ]]; then
+  results_url=
+  results_md5=
+  for script in "${download_scripts[@]}"; do
+    results=$("${script}" "${download_script_args[@]}" 2>&1)
+    url=$(sed -rn 's/^LIBRARY_URL=(.*)$/\1/p' <<< "${results}")
+    results_url+=$(printf '\n%s' "${url}")
+    md5=$(sed -rn 's/^LIBRARY_MD5=(.*)$/\1/p' <<< "${results}")
+    results_md5+=$(printf '\n%s' "${md5}")
+  done
+  substitute_strings nn_library_url_strings "${results_url}"
+  substitute_strings nn_library_md5_strings "${results_md5}"
+fi
diff --git a/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc b/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
index eef5f4c..9874a63 100644
--- a/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
+++ b/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
@@ -34,24 +34,27 @@
 #include "tensorflow/lite/micro/system_setup.h"
 #include "tensorflow/lite/micro/tools/benchmarking/metrics.h"
 #include "tensorflow/lite/micro/tools/benchmarking/op_resolver.h"
+#include "tensorflow/lite/micro/tools/benchmarking/show_meta_data.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 
-#if defined(MODEL_HEADER_PATH)
-#if !defined(MODEL_NAME)
-#error "MODEL_NAME missing from CCFLAGS"
-#endif  // !defined(MODEL_NAME)
+#if defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_MODEL_HEADER_PATH)
+#error "GENERIC_BENCHMARK_MODEL_HEADER_PATH missing from CXXFLAGS"
+#endif  // !defined(GENERIC_BENCHMARK_MODEL_HEADER_PATH)
+#if !defined(GENERIC_BENCHMARK_MODEL_NAME)
+#error "GENERIC_BENCHMARK_MODEL_NAME missing from CXXFLAGS"
+#endif  // !defined(GENERIC_BENCHMARK_MODEL_NAME)
 
-#include MODEL_HEADER_PATH
+#include GENERIC_BENCHMARK_MODEL_HEADER_PATH
 
 #define __MODEL_DATA(x) g_##x##_model_data
 #define _MODEL_DATA(x) __MODEL_DATA(x)
-#define MODEL_DATA _MODEL_DATA(MODEL_NAME)
+#define MODEL_DATA _MODEL_DATA(GENERIC_BENCHMARK_MODEL_NAME)
 #define __MODEL_SIZE(x) g_##x##_model_data_size
 #define _MODEL_SIZE(x) __MODEL_SIZE(x)
-#define MODEL_SIZE _MODEL_SIZE(MODEL_NAME)
+#define MODEL_SIZE _MODEL_SIZE(GENERIC_BENCHMARK_MODEL_NAME)
 
-#define USING_BUILTIN_MODEL
-#endif  // defind(MODEL_HEADER_PATH)
+#endif  // defind(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
 
 /*
  * Generic model benchmark.  Evaluates runtime performance of a provided model
@@ -68,14 +71,14 @@
 // so randomness isn't really needed.
 constexpr uint32_t kRandomSeed = 0xFB;
 
-#if !defined(USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
 constexpr size_t kTensorArenaSize = 3e6;
 constexpr size_t kModelSize = 2e6;
-#elif defined(TENSOR_ARENA_SIZE)
-constexpr size_t kTensorArenaSize = TENSOR_ARENA_SIZE;
+#elif defined(GENERIC_BENCHMARK_TENSOR_ARENA_SIZE)
+constexpr size_t kTensorArenaSize = GENERIC_BENCHMARK_TENSOR_ARENA_SIZE;
 #else
 constexpr size_t kTensorArenaSize = 5e6 - MODEL_SIZE;
-#endif  // !defined(USING_BUILTIN_MODEL)
+#endif  // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
 
 constexpr int kNumResourceVariable = 100;
 
@@ -95,7 +98,7 @@
   }
 }
 
-#if !defined(USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
 bool ReadFile(const char* file_name, void* buffer, size_t buffer_size) {
   std::unique_ptr<FILE, decltype(&fclose)> file(fopen(file_name, "rb"), fclose);
 
@@ -120,14 +123,12 @@
 
   return true;
 }
-#endif  // !defined(USING_BUILTIN_MODEL)
+#endif  // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
 
 int Benchmark(const uint8_t* model_data, tflite::PrettyPrintType print_type) {
   Profiler profiler;
   alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
 
-  MicroPrintf("\nConfigured arena size = %d\n", kTensorArenaSize);
-
   uint32_t event_handle = profiler.BeginEvent("TfliteGetModel");
   const tflite::Model* model = tflite::GetModel(model_data);
   profiler.EndEvent(event_handle);
@@ -180,18 +181,18 @@
 }  // namespace
 }  // namespace tflite
 
-#if !defined(USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
 void usage(const char* prog_name) {
   MicroPrintf("usage: %s filename [--csv]", prog_name);
 }
-#endif  // !defined(USING_BUILTIN_MODEL)
+#endif  // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
 
 int main(int argc, char** argv) {
   // Which format should be used to output debug information.
   tflite::PrettyPrintType print_type = tflite::PrettyPrintType::kTable;
   tflite::InitializeTarget();
 
-#if !defined(USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
   if (argc < 2 || argc > 3) {
     usage(argv[0]);
     return -1;
@@ -214,7 +215,9 @@
   }
 #else
   const uint8_t* model_data = MODEL_DATA;
-#endif  // !defined(USING_BUILTIN_MODEL)
+#endif  // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
 
+  MicroPrintf("\nConfigured arena size = %d\n", tflite::kTensorArenaSize);
+  tflite::GenericBenchmarkShowMetaData();
   return tflite::Benchmark(model_data, print_type);
 }
diff --git a/tensorflow/lite/micro/tools/benchmarking/show_meta_data.cc.template b/tensorflow/lite/micro/tools/benchmarking/show_meta_data.cc.template
new file mode 100644
index 0000000..a2102a4
--- /dev/null
+++ b/tensorflow/lite/micro/tools/benchmarking/show_meta_data.cc.template
@@ -0,0 +1,177 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstddef>
+#include <cstring>
+#include <type_traits>
+
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/micro/tools/benchmarking/show_meta_data.h"
+
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+#include "NatureDSP_Signal_id.h"
+#include "xa_nnlib_standards.h"
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+
+namespace tflite {
+namespace {
+
+#if defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+const char* model_analysis_strings[] = {
+    // %%%_model_analysis_strings_%%%
+};
+
+const char* model_sha1_strings[] = {
+    // %%%_model_sha1_strings_%%%
+};
+#endif  // defind(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+
+const char* compilation_date_strings[] = {
+    // %%%_compilation_date_strings_%%%
+};
+
+const char* git_commit_strings[] = {
+    // %%%_git_commit_strings_%%%
+};
+
+const char* git_status_strings[] = {
+    // %%%_git_status_strings_%%%
+};
+
+const char* cc_name_strings[] = {
+    // %%%_cc_name_strings_%%%
+};
+
+const char* cc_version_strings[] = {
+    // %%%_cc_version_strings_%%%
+};
+
+const char* cc_flags_strings[] = {
+    // %%%_cc_flags_strings_%%%
+};
+
+const char* cxx_name_strings[] = {
+    // %%%_cxx_name_strings_%%%
+};
+
+const char* cxx_version_strings[] = {
+    // %%%_cxx_version_strings_%%%
+};
+
+const char* cxx_flags_strings[] = {
+    // %%%_cxx_flags_strings_%%%
+};
+
+const char* optimization_flag_strings[] = {
+    // %%%_optimization_flag_strings_%%%
+};
+
+const char* target_info_strings[] = {
+    // %%%_target_info_strings_%%%
+};
+
+#if defined(CMSIS_NN) || defined(HIFI3) || defined(HIFI4) || defined(HIFI5) || \
+    defined(VISION_P6)
+const char* nn_library_url_strings[] = {
+    // %%%_nn_library_url_strings_%%%
+};
+
+const char* nn_library_md5_strings[] = {
+    // %%%_nn_library_md5_strings_%%%
+};
+#endif  // defined(CMSIS_NN) || defined(HIFI3) || defined(HIFI4) ||
+        // defined(HIFI5) || defined(VISION_P6)
+
+void ShowStrings(const char* title, const char** str, const size_t count) {
+  MicroPrintf("%s%s", title, str[0]);
+  for (size_t i = 1; i < count; i++) {
+    MicroPrintf("%s", str[i]);
+  }
+}
+
+void ShowSeparator() { MicroPrintf("--------------------"); }
+
+}  // namespace
+
+void GenericBenchmarkShowMetaData() {
+  ShowSeparator();
+  ShowStrings("Compiled on:\n\n", compilation_date_strings,
+              std::extent<decltype(compilation_date_strings)>::value);
+
+  ShowSeparator();
+  ShowStrings("Git SHA: ", git_commit_strings,
+              std::extent<decltype(git_commit_strings)>::value);
+  ShowStrings("\nGit status:\n\n", git_status_strings,
+              std::extent<decltype(git_status_strings)>::value);
+
+  ShowSeparator();
+  ShowStrings("C compiler: ", cc_name_strings,
+              std::extent<decltype(cc_name_strings)>::value);
+  ShowStrings("Version:\n\n", cc_version_strings,
+              std::extent<decltype(cc_version_strings)>::value);
+  ShowStrings("\nFlags:\n\n", cc_flags_strings,
+              std::extent<decltype(cc_flags_strings)>::value);
+  ShowStrings("\nC++ compiler: ", cxx_name_strings,
+              std::extent<decltype(cxx_name_strings)>::value);
+  ShowStrings("Version:\n\n", cxx_version_strings,
+              std::extent<decltype(cxx_version_strings)>::value);
+  ShowStrings("\nFlags:\n\n", cxx_flags_strings,
+              std::extent<decltype(cxx_flags_strings)>::value);
+  ShowStrings("\nOptimization: ", optimization_flag_strings,
+              std::extent<decltype(optimization_flag_strings)>::value);
+
+  ShowSeparator();
+  ShowStrings("Target information:\n\n", target_info_strings,
+              std::extent<decltype(target_info_strings)>::value);
+
+#if defined(CMSIS_NN) || defined(HIFI3) || defined(HIFI4) || defined(HIFI5) || \
+    defined(VISION_P6)
+  ShowSeparator();
+  ShowStrings("NN library download URLs:\n\n", nn_library_url_strings,
+              std::extent<decltype(nn_library_url_strings)>::value);
+  ShowStrings("\nNN library MD5 checksums:\n\n", nn_library_md5_strings,
+              std::extent<decltype(nn_library_md5_strings)>::value);
+#endif  // defined(CMSIS_NN) || defined(HIFI3) || defined(HIFI4) ||
+        // defined(HIFI5) || defined(VISION_P6)
+
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+  ShowSeparator();
+
+  char version_buffer[30 + 1];
+  memset(version_buffer, 0, sizeof(version_buffer));
+  NatureDSP_Signal_get_library_version(version_buffer);
+  MicroPrintf("NatureDSP library version: %s", version_buffer);
+  memset(version_buffer, 0, sizeof(version_buffer));
+  NatureDSP_Signal_get_library_api_version(version_buffer);
+  MicroPrintf("NatureDSP API version: %s", version_buffer);
+
+  const char* nnlib_library_version = xa_nnlib_get_lib_version_string();
+  const char* nnlib_api_version = xa_nnlib_get_lib_api_version_string();
+  MicroPrintf("NNLIB library version: %s", nnlib_library_version);
+  MicroPrintf("NNLIB API version: %s", nnlib_api_version);
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+
+#if defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+  ShowSeparator();
+  ShowStrings("Model SHA1:\n\n", model_sha1_strings,
+              std::extent<decltype(model_sha1_strings)>::value);
+  ShowStrings("\nModel analysis:\n\n", model_analysis_strings,
+              std::extent<decltype(model_analysis_strings)>::value);
+#endif  // defind(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+
+  ShowSeparator();
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/tools/benchmarking/show_meta_data.h b/tensorflow/lite/micro/tools/benchmarking/show_meta_data.h
new file mode 100644
index 0000000..37cf616
--- /dev/null
+++ b/tensorflow/lite/micro/tools/benchmarking/show_meta_data.h
@@ -0,0 +1,24 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+namespace tflite {
+
+#if !defined(GENERIC_BENCHMARK_NO_META_DATA)
+void GenericBenchmarkShowMetaData();
+#else
+inline void GenericBenchmarkShowMetaData() {}
+#endif  // defined(GENERIC_BENCHMARK_NO_META_DATA)
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/tools/ci_build/test_cortex_m_corstone_300.sh b/tensorflow/lite/micro/tools/ci_build/test_cortex_m_corstone_300.sh
index 516c181..a5d02db 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_cortex_m_corstone_300.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_cortex_m_corstone_300.sh
@@ -42,3 +42,13 @@
 readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean
 readable_run make -j$(nproc) -f tensorflow/lite/micro/tools/make/Makefile CO_PROCESSOR=ethos_u OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} TOOLCHAIN=${TOOLCHAIN} build
 readable_run make -f tensorflow/lite/micro/tools/make/Makefile CO_PROCESSOR=ethos_u OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} TOOLCHAIN=${TOOLCHAIN} test
+
+# run generic benchmark
+readable_run make -j$(nproc) -f tensorflow/lite/micro/tools/make/Makefile \
+  CO_PROCESSOR=ethos_u \
+  OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} \
+  TARGET=${TARGET} \
+  TARGET_ARCH=${TARGET_ARCH} \
+  TOOLCHAIN=${TOOLCHAIN} \
+  GENERIC_BENCHMARK_MODEL_PATH=tensorflow/lite/micro/models/person_detect.tflite \
+  run_tflm_benchmark
diff --git a/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh b/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
index 623238e..998827f 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
@@ -40,3 +40,10 @@
 readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
 readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
 readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile integration_tests TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# run generic benchmark
+readable_run make -j$(nproc) -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+  TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+  EXTERNAL_DIR=${EXTERNAL_DIR} \
+  GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+  run_tflm_benchmark
diff --git a/tensorflow/lite/micro/tools/ci_build/test_xtensa_fusion_f1.sh b/tensorflow/lite/micro/tools/ci_build/test_xtensa_fusion_f1.sh
index ae4e858..2fd6bf8 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_xtensa_fusion_f1.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_xtensa_fusion_f1.sh
@@ -68,4 +68,15 @@
   TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
   EXTERNAL_DIR=${EXTERNAL_DIR} \
   test -j$(nproc)
+
+# run generic benchmark
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+  TARGET=xtensa \
+  TARGET_ARCH=hifi3 \
+  OPTIMIZED_KERNEL_DIR=xtensa \
+  XTENSA_CORE=F1_190305_swupgrade \
+  TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+  EXTERNAL_DIR=${EXTERNAL_DIR} \
+  GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+  run_tflm_benchmark -j$(nproc)
 fi
diff --git a/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi3z.sh b/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi3z.sh
index cf038c8..ff3d600 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi3z.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi3z.sh
@@ -95,4 +95,15 @@
     TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
     EXTERNAL_DIR=${EXTERNAL_DIR} \
     test -j$(nproc)
+
+  # run generic benchmark
+  readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+    TARGET=xtensa \
+    TARGET_ARCH=hifi3 \
+    OPTIMIZED_KERNEL_DIR=xtensa \
+    XTENSA_CORE=HIFI_190304_swupgrade \
+    TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+    EXTERNAL_DIR=${EXTERNAL_DIR} \
+    GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+    run_tflm_benchmark -j$(nproc)
 fi
diff --git a/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi5.sh b/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi5.sh
index 82a04a9..0ad29e8 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi5.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi5.sh
@@ -47,3 +47,14 @@
   TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
   EXTERNAL_DIR=${EXTERNAL_DIR} \
   test -j$(nproc)
+
+# run generic benchmark
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+  TARGET=xtensa \
+  TARGET_ARCH=hifi5 \
+  OPTIMIZED_KERNEL_DIR=xtensa \
+  XTENSA_CORE=PRD_H5_RDO_07_01_2022 \
+  TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+  EXTERNAL_DIR=${EXTERNAL_DIR} \
+  GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+  run_tflm_benchmark -j$(nproc)
\ No newline at end of file
diff --git a/tensorflow/lite/micro/tools/ci_build/test_xtensa_vision_p6.sh b/tensorflow/lite/micro/tools/ci_build/test_xtensa_vision_p6.sh
index a2744b5..1c6de93 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_xtensa_vision_p6.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_xtensa_vision_p6.sh
@@ -54,4 +54,15 @@
     TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
     EXTERNAL_DIR=${EXTERNAL_DIR} \
     test -j$(nproc)
+
+  # run generic benchmark
+  readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+    TARGET=xtensa \
+    TARGET_ARCH=vision_p6 \
+    OPTIMIZED_KERNEL_DIR=xtensa \
+    XTENSA_CORE=P6_200528 \
+    TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+    EXTERNAL_DIR=${EXTERNAL_DIR} \
+    GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+    run_tflm_benchmark -j$(nproc)
 fi
diff --git a/tensorflow/lite/micro/tools/make/bash_helpers.sh b/tensorflow/lite/micro/tools/make/bash_helpers.sh
index f29a641..e5446de 100755
--- a/tensorflow/lite/micro/tools/make/bash_helpers.sh
+++ b/tensorflow/lite/micro/tools/make/bash_helpers.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,32 @@
 # limitations under the License.
 # ==============================================================================
 
+# Check the download path argument
+#
+# Parameter(s):
+#   ${1} - path to the download directory or --no-downloads
+#
+# Outputs:
+# "yes" or "no"
+function check_should_download() {
+  if [[ ${1} == "--no-downloads" ]]; then
+    echo "no"
+  else
+    echo "yes"
+  fi
+}
+
+# Show the download URL and MD5 checksum
+#
+# Parameter(s):
+#   ${1} - download URL
+#   ${2} - download MD5 checksum
+#
+# Download scripts require informational output should be on stderr.
+function show_download_url_md5() {
+  echo >&2 "LIBRARY_URL=${1}"
+  echo >&2 "LIBRARY_MD5=${2}"
+}
 
 # Compute the MD5 sum.
 #
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
index bd8c615..fb0ad92 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
@@ -36,21 +36,22 @@
 source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/bash_helpers.sh
 
 DOWNLOADS_DIR=${1}
-if [ ! -d ${DOWNLOADS_DIR} ]; then
-  echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
-  exit 1
-fi
-
 DOWNLOADED_CMSIS_NN_PATH=${DOWNLOADS_DIR}/cmsis_nn
 
-if [ -d ${DOWNLOADED_CMSIS_NN_PATH} ]; then
+ZIP_PREFIX_NN="01dee38e6d6bfbbf202f0cd425bbea1731747d51"
+CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip"
+CMSIS_NN_MD5="f20be93ededf42bb704c19f699a24313"
+
+should_download=$(check_should_download ${DOWNLOADS_DIR})
+
+if [[ ${should_download} == "no" ]]; then
+  show_download_url_md5 ${CMSIS_NN_URL} ${CMSIS_NN_MD5}
+elif [ ! -d ${DOWNLOADS_DIR} ]; then
+  echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
+  exit 1
+elif [ -d ${DOWNLOADED_CMSIS_NN_PATH} ]; then
   echo >&2 "${DOWNLOADED_CMSIS_NN_PATH} already exists, skipping the download."
 else
-
-  ZIP_PREFIX_NN="01dee38e6d6bfbbf202f0cd425bbea1731747d51"
-  CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip"
-  CMSIS_NN_MD5="f20be93ededf42bb704c19f699a24313"
-
   # wget is much faster than git clone of the entire repo. So we wget a specific
   # version and can then apply a patch, as needed.
   wget ${CMSIS_NN_URL} -O /tmp/${ZIP_PREFIX_NN}.zip >&2
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc b/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
index b6db417..70e1880 100644
--- a/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
+++ b/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
@@ -69,7 +69,8 @@
     $(shell find $(NDSPLIB_PATH)/library/fft/fft -name "*.c") \
     $(shell find $(NDSPLIB_PATH)/library/fft/fft_ie -name "*.c") \
     $(shell find $(NDSPLIB_PATH)/library/fft/fft_ief -name "*.c") \
-    $(shell find $(NDSPLIB_PATH)/library/twiddles -name "*.c")
+    $(shell find $(NDSPLIB_PATH)/library/twiddles -name "*.c") \
+    $(shell find $(NDSPLIB_PATH)/library -name "version.c")
 
   EXCLUDED_NNLIB_SRCS = \
     $(NNLIB_PATH)/algo/layers/cnn/src/xa_nn_cnn_api.c \
@@ -125,7 +126,8 @@
     $(shell find $(NDSPLIB_PATH)/library/fft/fft -name "*.c") \
     $(shell find $(NDSPLIB_PATH)/library/fft/fft_ie -name "*.c") \
     $(shell find $(NDSPLIB_PATH)/library/fft/fft_ief -name "*.c") \
-    $(shell find $(NDSPLIB_PATH)/library/twiddles -name "*.c")
+    $(shell find $(NDSPLIB_PATH)/library/twiddles -name "*.c") \
+    $(shell find $(NDSPLIB_PATH)/library -name "version.c")
 
   EXCLUDED_NNLIB_SRCS = \
     $(NNLIB_PATH)/algo/layers/cnn/src/xa_nn_cnn_api.c \
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh
index d80855f..2c81710 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -38,12 +38,8 @@
 source ${3}tensorflow/lite/micro/tools/make/bash_helpers.sh
 
 DOWNLOADS_DIR=${1}
-if [ ! -d ${DOWNLOADS_DIR} ]; then
-  echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
-  exit 1
-fi
 
-if [[ ${2} == "hifi4" ]]; then
+if [[ ${2} == "hifi4" || ${2} == "hifi3" ]]; then
   LIBRARY_URL="http://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_hifi4_09_05_2023.zip"
   LIBRARY_DIRNAME="xa_nnlib_hifi4"
   LIBRARY_MD5="2a54e056aef73a4fcffde4643998501a"
@@ -62,35 +58,30 @@
 
 LIBRARY_INSTALL_PATH=${DOWNLOADS_DIR}/${LIBRARY_DIRNAME}
 
-if [ -d ${LIBRARY_INSTALL_PATH} ]; then
+should_download=$(check_should_download ${DOWNLOADS_DIR})
+
+if [[ ${should_download} == "no" ]]; then
+  show_download_url_md5 ${LIBRARY_URL} ${LIBRARY_MD5}
+elif [ ! -d ${DOWNLOADS_DIR} ]; then
+  echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
+  exit 1
+elif [ -d ${LIBRARY_INSTALL_PATH} ]; then
   echo >&2 "${LIBRARY_INSTALL_PATH} already exists, skipping the download."
 else
   TEMPDIR="$(mktemp -d)"
   TEMPFILE="${TEMPDIR}/${LIBRARY_DIRNAME}.zip"
   wget ${LIBRARY_URL} -O "$TEMPFILE" >&2
-  MD5=`md5sum "$TEMPFILE" | awk '{print $1}'`
+  check_md5 "${TEMPFILE}" ${LIBRARY_MD5}
 
-  if [[ ${MD5} != ${LIBRARY_MD5} ]]
-  then
-    echo "Bad checksum. Expected: ${LIBRARY_MD5}, Got: ${MD5}"
-    exit 1
-  fi
+  unzip -qo "$TEMPFILE" -d ${DOWNLOADS_DIR} >&2
 
-  # Check if another make process has already extracted the downloaded files.
-  # If so, skip extracting and patching.
-  if [ -d ${LIBRARY_INSTALL_PATH} ]; then
-    echo >&2 "${LIBRARY_INSTALL_PATH} already exists, skipping the extraction."
-  else
-    unzip -qo "$TEMPFILE" -d ${DOWNLOADS_DIR} >&2
+  rm -rf "${TEMPDIR}"
 
-    rm -rf "${TEMPDIR}"
-
-    pushd "${LIBRARY_INSTALL_PATH}" > /dev/null
-    chmod -R +w ./
-    if [[ -f "../../ext_libs/xa_nnlib_${2}.patch" ]]; then
-      create_git_repo ./
-      apply_patch_to_folder ./ "../../ext_libs/xa_nnlib_${2}.patch" "TFLM patch"
-    fi
+  pushd "${LIBRARY_INSTALL_PATH}" > /dev/null
+  chmod -R +w ./
+  if [[ -f "../../ext_libs/xa_nnlib_${2}.patch" ]]; then
+    create_git_repo ./
+    apply_patch_to_folder ./ "../../ext_libs/xa_nnlib_${2}.patch" "TFLM patch"
   fi
 fi
 
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh
index 81bf848..71fe1d1 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -38,10 +38,6 @@
 source ${3}tensorflow/lite/micro/tools/make/bash_helpers.sh
 
 DOWNLOADS_DIR=${1}
-if [ ! -d ${DOWNLOADS_DIR} ]; then
-  echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
-  exit 1
-fi
 
 if [[ ${2} == "hifi3" ]]; then
   COMMIT="d17bf205dc530a9e1a1d979249520f4401529db1"
@@ -68,42 +64,37 @@
 
 LIBRARY_INSTALL_PATH=${DOWNLOADS_DIR}/${LIBRARY_DIRNAME}
 
-if [ -d ${LIBRARY_INSTALL_PATH} ]; then
+should_download=$(check_should_download ${DOWNLOADS_DIR})
+
+if [[ ${should_download} == "no" ]]; then
+  show_download_url_md5 ${LIBRARY_URL} ${LIBRARY_MD5}
+elif [ ! -d ${DOWNLOADS_DIR} ]; then
+  echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
+  exit 1
+elif [ -d ${LIBRARY_INSTALL_PATH} ]; then
   echo >&2 "${LIBRARY_INSTALL_PATH} already exists, skipping the download."
 else
   TEMPDIR="$(mktemp -d)"
   TEMPFILE="${TEMPDIR}/${LIBRARY_DIRNAME}.zip"
   wget ${LIBRARY_URL} -O "$TEMPFILE" >&2
-  MD5=`md5sum "$TEMPFILE" | awk '{print $1}'`
+  check_md5 "${TEMPFILE}" ${LIBRARY_MD5}
 
-  if [[ ${MD5} != ${LIBRARY_MD5} ]]
-  then
-    echo "Bad checksum. Expected: ${LIBRARY_MD5}, Got: ${MD5}"
-    exit 1
+  unzip -qo "$TEMPFILE" -d ${TEMPDIR} >&2
+  unzip -qo ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/NDSP_${CORE_NAME}*.zip -d ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/ >&2
+  find ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/* -maxdepth 0 -type d -exec mv {} ${LIBRARY_INSTALL_PATH} \;
+  rm -rf "${TEMPDIR}"
+  # NDSP sources in GitHub currently uses DOS style newlines, which causes compiler errors.
+  find ${LIBRARY_INSTALL_PATH} -type f -exec sed -i.bak 's/\r$//g' {} \;
+
+  pushd "${LIBRARY_INSTALL_PATH}" > /dev/null
+  chmod -R +w ./
+  if [[ -f "../../ext_libs/ndsplib-${2}.patch" ]]; then
+    create_git_repo ./
+    apply_patch_to_folder ./ "../../ext_libs/ndsplib-${2}.patch" "TFLM patch"
   fi
-
-  # Check if another make process has already extracted the downloaded files.
-  # If so, skip extracting and patching.
-  if [ -d ${LIBRARY_INSTALL_PATH} ]; then
-    echo >&2 "${LIBRARY_INSTALL_PATH} already exists, skipping the extraction."
-  else
-    unzip -qo "$TEMPFILE" -d ${TEMPDIR} >&2
-    unzip -qo ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/NDSP_${CORE_NAME}*.zip -d ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/ >&2
-    find ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/* -maxdepth 0 -type d -exec mv {} ${LIBRARY_INSTALL_PATH} \;
-    rm -rf "${TEMPDIR}"
-    # NDSP sources in GitHub currently uses DOS style newlines, which causes compiler errors.
-    find ${LIBRARY_INSTALL_PATH} -type f -exec sed -i.bak 's/\r$//g' {} \;
-
-    pushd "${LIBRARY_INSTALL_PATH}" > /dev/null
-    chmod -R +w ./
-    if [[ -f "../../ext_libs/ndsplib-${2}.patch" ]]; then
-      create_git_repo ./
-      apply_patch_to_folder ./ "../../ext_libs/ndsplib-${2}.patch" "TFLM patch"
-    fi
-    # Rename the strings in __renaming__.h to names that are traceable to TFLM.
-    # Note that renaming is disabled by default and must be enabled with -D__RENAMING__
-    sed -i 's/NatureDSP_/NatureDSP_TFLM_/' library/include_private/__renaming__.h
-  fi
+  # Rename the strings in __renaming__.h to names that are traceable to TFLM.
+  # Note that renaming is disabled by default and must be enabled with -D__RENAMING__
+  sed -i 's/NatureDSP_/NatureDSP_TFLM_/' library/include_private/__renaming__.h
 fi
 
 echo "SUCCESS"