Add meta-data to generic benchmark (#2496)
@tensorflow/micro
Add meta-data strings to the binary. Meta-data is output each time binary is executed. See the README for a sample of the output.
If a Git repo. is not available at ```${TENSORFLOW_ROOT}```, an appropriate message is generated in the meta-data.
Changes to how the CMSISNN and XTENSA NN library download scripts work.
Adds run of generic benchmark with embedded model for default build of x86, Corstone-300, Xtensa to test scripts executed during CI.
bug=fixes #2495diff --git a/tensorflow/lite/micro/tools/benchmarking/BUILD b/tensorflow/lite/micro/tools/benchmarking/BUILD
index 1c3ebd4..6691ac3 100644
--- a/tensorflow/lite/micro/tools/benchmarking/BUILD
+++ b/tensorflow/lite/micro/tools/benchmarking/BUILD
@@ -20,6 +20,8 @@
cc_library(
name = "generic_benchmark_lib",
srcs = ["generic_model_benchmark.cc"],
+ hdrs = ["show_meta_data.h"],
+ defines = ["GENERIC_BENCHMARK_NO_META_DATA"],
deps = [
":metrics",
":op_resolver",
diff --git a/tensorflow/lite/micro/tools/benchmarking/Makefile.inc b/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
index 32e782c..396e701 100644
--- a/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
+++ b/tensorflow/lite/micro/tools/benchmarking/Makefile.inc
@@ -1,30 +1,59 @@
MICROLITE_BENCHMARK_ROOT_DIR := $(TENSORFLOW_ROOT)tensorflow/lite/micro/tools/benchmarking
-ifneq ($(BENCHMARK_MODEL_PATH),)
- GENERIC_BENCHMARK_MODEL_DIR := $(dir $(BENCHMARK_MODEL_PATH))
- GENERIC_BENCHMARK_MODEL_NAME := $(notdir $(basename $(BENCHMARK_MODEL_PATH)))
- CXXFLAGS += -DMODEL_HEADER_PATH=\"$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h\"
- CXXFLAGS += -DMODEL_NAME=$(GENERIC_BENCHMARK_MODEL_NAME)
-ifneq ($(BENCHMARK_ARENA_SIZE),)
- CXXFLAGS += -DTENSOR_ARENA_SIZE=$(BENCHMARK_ARENA_SIZE)
+ifneq ($(GENERIC_BENCHMARK_MODEL_PATH),)
+ GENERIC_BENCHMARK_MODEL_DIR := $(dir $(GENERIC_BENCHMARK_MODEL_PATH))
+ GENERIC_BENCHMARK_MODEL_NAME := $(notdir $(basename $(GENERIC_BENCHMARK_MODEL_PATH)))
+ CXXFLAGS += -DGENERIC_BENCHMARK_USING_BUILTIN_MODEL
+ CXXFLAGS += -DGENERIC_BENCHMARK_MODEL_HEADER_PATH=\"$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h\"
+ CXXFLAGS += -DGENERIC_BENCHMARK_MODEL_NAME=$(GENERIC_BENCHMARK_MODEL_NAME)
+ifneq ($(GENERIC_BENCHMARK_ARENA_SIZE),)
+ CXXFLAGS += -DGENERIC_BENCHMARK_TENSOR_ARENA_SIZE=$(GENERIC_BENCHMARK_ARENA_SIZE)
endif
- GENERIC_BENCHMARK_GENERATOR_INPUTS := $(TENSORFLOW_ROOT)$(BENCHMARK_MODEL_PATH)
+ # model path includes $(TENSORFLOW_ROOT) as part of the make invocation
+ GENERIC_BENCHMARK_GENERATOR_INPUTS := $(GENERIC_BENCHMARK_MODEL_PATH)
GENERIC_BENCHMARK_GENERATED_SRCS := \
- $(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.cc
+ $(GENERATED_SRCS_DIR)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.cc
GENERIC_BENCHMARK_GENERATED_HDRS := \
- $(GENERATED_SRCS_DIR)$(TENSORFLOW_ROOT)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h
+ $(GENERATED_SRCS_DIR)$(GENERIC_BENCHMARK_MODEL_DIR)$(GENERIC_BENCHMARK_MODEL_NAME)_model_data.h
endif
GENERIC_BENCHMARK_SRCS := \
$(MICROLITE_BENCHMARK_ROOT_DIR)/generic_model_benchmark.cc \
-$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.cc
+$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.cc \
+$(GENERATED_SRCS_DIR)$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc
GENERIC_BENCHMARK_HDRS := \
$(MICROLITE_BENCHMARK_ROOT_DIR)/op_resolver.h \
-$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.h
+$(MICROLITE_BENCHMARK_ROOT_DIR)/metrics.h \
+$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.h
+
+# always rebuild these to catch MODEL_PATH and ARENA_SIZE changes on command line
+.PHONY: $(GENERATED_SRCS_DIR)$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc
+.PHONY: $(MICROLITE_BENCHMARK_ROOT_DIR)/generic_model_benchmark.cc
+
+$(GENERATED_SRCS_DIR)$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc:
+ CC="$(CC)" \
+ CXX="$(CXX)" \
+ CC_FLAGS="$(CCFLAGS)" \
+ CXX_FLAGS="$(CXXFLAGS)" \
+ KERNEL_OPTIMIZATION="$(KERNEL_OPTIMIZATION_LEVEL)" \
+ CORE_OPTIMIZATION="$(CORE_OPTIMIZATION_LEVEL)" \
+ THIRD_PARTY_KERNEL_OPTIMIZATION="$(THIRD_PARTY_KERNEL_OPTIMIZATION_LEVEL)" \
+ TARGET=$(TARGET) \
+ TARGET_ARCH=$(TARGET_ARCH) \
+ TENSORFLOW_ROOT="$(TENSORFLOW_ROOT)" \
+ OPTIMIZED_KERNEL=$(OPTIMIZED_KERNEL_DIR) \
+ BUILD_TYPE=$(BUILD_TYPE) \
+ XTENSA_CORE=$(XTENSA_CORE) \
+ XTENSA_BASE=$(XTENSA_BASE) \
+ XTENSA_TOOLS_VERSION=$(XTENSA_TOOLS_VERSION) \
+ TEMPLATE_FILE="$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc.template" \
+ GENERATED_FILE="$(GENERATED_SRCS_DIR)$(MICROLITE_BENCHMARK_ROOT_DIR)/show_meta_data.cc" \
+ MODEL_FILE="$(GENERIC_BENCHMARK_MODEL_PATH)" \
+ $(MICROLITE_BENCHMARK_ROOT_DIR)/collect_meta_data.sh
ifneq ($(TARGET),bluepill)
ifneq ($(TARGET_ARCH), $(filter $(TARGET_ARCH), hifimini))
diff --git a/tensorflow/lite/micro/tools/benchmarking/README.md b/tensorflow/lite/micro/tools/benchmarking/README.md
index 0bc727e..45f52b5 100644
--- a/tensorflow/lite/micro/tools/benchmarking/README.md
+++ b/tensorflow/lite/micro/tools/benchmarking/README.md
@@ -8,15 +8,15 @@
Building the tool with the model compiled in uses two additional Makefile
variables:
-* `BENCHMARK_MODEL_PATH`: the path to the TfLite format model file. This
+* `GENERIC_BENCHMARK_MODEL_PATH`: the path to the TfLite format model file. This
can be a relative or absolute path. This variable is required.
-* `BENCHMARK_ARENA_SIZE`: the size of the TFLM interpreter arena, in bytes.
+* `GENERIC_BENCHMARK_ARENA_SIZE`: the size of the TFLM interpreter arena, in bytes.
This variable is optional.
## Tested, working targets
* x86
* cortex_m_qemu (no timing data)
-* Xtensa
+* Xtensa (p6, hifi3)
* cortex_m_corstone_300
## Tested, non-working targets
@@ -32,17 +32,388 @@
gen/linux_x86_64_default/bin/tflm_benchmark tensorflow/lite/micro/models/person_detect.tflite
```
-Build with model compiled into tool:
+Build and run with model compiled into tool:
```
-make -f tensorflow/lite/micro/tools/make/Makefile tflm_benchmark -j$(nproc) BENCHMARK_MODEL_PATH=tensorflow/lite/micro/models/person_detect.tflite BENCHMARK_ARENA_SIZE=`expr 100 \* 1024`
-```
-Run with model compiled into tool:
-```
-gen/linux_x86_64_default/bin/tflm_benchmark
+make -f tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=default run_tflm_benchmark -j$(nproc) GENERIC_BENCHMARK_MODEL_PATH=tensorflow/lite/micro/models/person_detect.tflite GENERIC_BENCHMARK_ARENA_SIZE=`expr 150 \* 1024`
```
## Build and run for Xtensa
Build and run with model compiled into tool:
```
-make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=vision_p6 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=P6_200528 BUILD_TYPE=default run_tflm_benchmark -j$(nproc) BENCHMARK_MODEL_PATH=/tmp/keyword_scrambled.tflite BENCHMARK_ARENA_SIZE=`expr 50 \* 1024`
+make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=vision_p6 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=P6_200528 BUILD_TYPE=default run_tflm_benchmark -j$(nproc) GENERIC_BENCHMARK_MODEL_PATH=/tmp/keyword_scrambled.tflite GENERIC_BENCHMARK_ARENA_SIZE=`expr 50 \* 1024`
+```
+
+## Build and run for Cortex-M using Corstone 300 simulator
+Build and run with model compiled into tool:
+```
+make -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m0 OPTIMIZED_KERNEL_DIR=cmsis_nn BUILD_TYPE=default run_tflm_benchmark -j$(nproc) GENERIC_BENCHMARK_MODEL_PATH=tensorflow/lite/micro/models/person_detect.tflite GENERIC_BENCHMARK_ARENA_SIZE=`expr 150 \* 1024`
+```
+
+## Build and run using Bazel
+
+This is only for the x86 command line argument build, and does not contain meta-data:
+```
+bazel build tensorflow/lite/micro/tools/benchmarking:tflm_benchmark
+bazel-bin/tensorflow/lite/micro/tools/benchmarking/tflm_benchmark tensorflow/lite/micro/models/person_detect.tflite
+```
+
+## Example output with meta-data and built-in model layer information
+
+This sample output is for Cortex-M using Corstone 300:
+```
+Configured arena size = 153600
+
+--------------------
+Compiled on:
+
+Thu Mar 7 04:59:13 AM PST 2024
+--------------------
+Git SHA: 27b1f546cec03c87deaf2ff94c830f9cbd0f2e69
+
+Git status:
+
+On branch main
+Your branch is up to date with 'origin/main'.
+
+Untracked files:
+ (use "git add <file>..." to include in what will be committed)
+ MODULE.bazel
+ MODULE.bazel.lock
+nothing added to commit but untracked files present (use "git add" to track)
+--------------------
+C compiler: tensorflow/lite/micro/tools/make/downloads/gcc_embedded/bin/arm-none-eabi-gcc
+Version:
+
+arm-none-eabi-gcc (Arm GNU Toolchain 13.2.rel1 (Build arm-13.7)) 13.2.1 20231009
+Copyright (C) 2023 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Flags:
+
+-Wimplicit-function-declaration -std=c11 -Werror -fno-unwind-tables -ffunction-sections
+-fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON
+-DCMSIS_NN -DKERNELS_OPTIMIZED_FOR_SPEED -mcpu=cortex-m0 -mfpu=auto
+-DTF_LITE_MCU_DEBUG_LOG -mthumb -mfloat-abi=soft -funsigned-char -mlittle-endian
+-fomit-frame-pointer -MD -DARMCM0
+
+C++ compiler: tensorflow/lite/micro/tools/make/downloads/gcc_embedded/bin/arm-none-eabi-g++
+Version:
+
+arm-none-eabi-g++ (Arm GNU Toolchain 13.2.rel1 (Build arm-13.7)) 13.2.1 20231009
+Copyright (C) 2023 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Flags:
+
+-std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -Wnon-virtual-dtor -Werror
+-fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0
+-DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -Wsign-compare -Wdouble-promotion
+-Wunused-variable -Wunused-function -Wswitch -Wvla -Wall -Wextra
+-Wmissing-field-initializers -Wstrict-aliasing -Wno-unused-parameter -DCMSIS_NN
+-DKERNELS_OPTIMIZED_FOR_SPEED -mcpu=cortex-m0 -mfpu=auto -DTF_LITE_MCU_DEBUG_LOG -mthumb
+-mfloat-abi=soft -funsigned-char -mlittle-endian -fomit-frame-pointer -MD -DARMCM0
+-DCMSIS_DEVICE_ARM_CORTEX_M_XX_HEADER_FILE="ARMCM0.h"
+-DGENERIC_BENCHMARK_USING_BUILTIN_MODEL
+-DGENERIC_BENCHMARK_MODEL_HEADER_PATH="tensorflow/lite/micro/models/person_detect_model_da
+ta.h" -DGENERIC_BENCHMARK_MODEL_NAME=person_detect
+-DGENERIC_BENCHMARK_TENSOR_ARENA_SIZE=153600 -DGENERIC_BENCHMARK_SHOW_META_DATA
+
+Optimization: kernel= -O2 core= -Os third-party-kernel= -O2
+--------------------
+Target information:
+
+TARGET=cortex_m_corstone_300
+TARGET_ARCH=cortex-m0
+OPTIMIZATION=cmsis_nn
+BUILD_TYPE=default
+--------------------
+NN library download URLs:
+
+http://github.com/ARM-software/CMSIS-NN/archive/8492d82a1a81651977c5f5128492b4a0f0cf6715.z
+ip
+
+NN library MD5 checksums:
+
+2cb03e4f044b78af6751009cd53247a8
+--------------------
+Model SHA1:
+
+bcafcaa99d2eaf089f0ca25d66f56a2177e93f76
+
+Model analysis:
+
+=== tensorflow/lite/micro/models/person_detect.tflite ===
+Your TFLite model has '1' subgraph(s). In the subgraph description below,
+T# represents the Tensor numbers. For example, in Subgraph#0, the DEPTHWISE_CONV_2D op
+takes
+tensor #88 and tensor #0 and tensor #33 as input and produces tensor #34 as output.
+Subgraph#0(T#88) -> [T#87]
+ Op#0 DEPTHWISE_CONV_2D(T#88, T#0, T#33[3774, -107, -84394, -13908, 20697, ...]) ->
+[T#34]
+ Op#1 DEPTHWISE_CONV_2D(T#34, T#9, T#52[31132, 28, 273, -2692, 7409, ...]) -> [T#51]
+ Op#2 CONV_2D(T#51, T#10, T#53[10064, 1130, -13056, -30284, -23349, ...]) -> [T#54]
+ Op#3 DEPTHWISE_CONV_2D(T#54, T#11, T#56[306, -158, 19181, -364, 6237, ...]) -> [T#55]
+ Op#4 CONV_2D(T#55, T#12, T#57[-7649, 12287, -4433, 5851, -188, ...]) -> [T#58]
+ Op#5 DEPTHWISE_CONV_2D(T#58, T#13, T#60[7297, -498, 263, -1975, 2260, ...]) -> [T#59]
+ Op#6 CONV_2D(T#59, T#14, T#61[-4742, -4160, 6985, 8647, 29773, ...]) -> [T#62]
+ Op#7 DEPTHWISE_CONV_2D(T#62, T#15, T#64[28588, 363, 27592, 22294, -4344, ...]) -> [T#63]
+ Op#8 CONV_2D(T#63, T#16, T#65[12683, 36581, 6206, 1236, 15834, ...]) -> [T#66]
+ Op#9 DEPTHWISE_CONV_2D(T#66, T#17, T#68[-6353, 9090, -30, -1019, -496, ...]) -> [T#67]
+ Op#10 CONV_2D(T#67, T#18, T#69[3895, -6563, -8843, -2066, -1372, ...]) -> [T#70]
+ Op#11 DEPTHWISE_CONV_2D(T#70, T#19, T#72[20437, -365, -2518, 20827, -904, ...]) ->
+[T#71]
+ Op#12 CONV_2D(T#71, T#20, T#73[-10120, 9768, 3524, 3796, 6896, ...]) -> [T#74]
+ Op#13 DEPTHWISE_CONV_2D(T#74, T#21, T#76[-3969, -1910, -2425, -114, 4456, ...]) ->
+[T#75]
+ Op#14 CONV_2D(T#75, T#22, T#77[-13202, 13929, -4357, 19492, 1971, ...]) -> [T#78]
+ Op#15 DEPTHWISE_CONV_2D(T#78, T#23, T#80[-6169, -10, -2788, 14420, -7457, ...]) ->
+[T#79]
+ Op#16 CONV_2D(T#79, T#24, T#81[155, -3073, 291, -902, -9942, ...]) -> [T#82]
+ Op#17 DEPTHWISE_CONV_2D(T#82, T#25, T#84[-2063, 10755, -12037, -6417, 2147, ...]) ->
+[T#83]
+ Op#18 CONV_2D(T#83, T#26, T#85[-1872, -7549, 13994, 3191, -614, ...]) -> [T#86]
+ Op#19 DEPTHWISE_CONV_2D(T#86, T#1, T#36[-6485, 294, 686, -6011, -5196, ...]) -> [T#35]
+ Op#20 CONV_2D(T#35, T#2, T#37[7116, 8066, 11755, 11674, 9983, ...]) -> [T#38]
+ Op#21 DEPTHWISE_CONV_2D(T#38, T#3, T#40[7735, 5235, 4334, -6485, 9397, ...]) -> [T#39]
+ Op#22 CONV_2D(T#39, T#4, T#41[2947, 10152, -7865, -554, -13760, ...]) -> [T#42]
+ Op#23 DEPTHWISE_CONV_2D(T#42, T#5, T#44[-4755, 7899, -488, -2954, 2990, ...]) -> [T#43]
+ Op#24 CONV_2D(T#43, T#6, T#45[-6269, -22458, 13332, -16368, 4435, ...]) -> [T#46]
+ Op#25 DEPTHWISE_CONV_2D(T#46, T#7, T#48[333, -4743, -310, -2471, 4804, ...]) -> [T#47]
+ Op#26 CONV_2D(T#47, T#8, T#49[6677, -3593, 3754, 26316, -4761, ...]) -> [T#50]
+ Op#27 AVERAGE_POOL_2D(T#50) -> [T#27]
+ Op#28 CONV_2D(T#27, T#30, T#29[16267, -17079]) -> [T#28]
+ Op#29 RESHAPE(T#28, T#32[1, 2]) -> [T#31]
+ Op#30 SOFTMAX(T#31) -> [T#87]
+Tensors of Subgraph#0
+ T#0(MobilenetV1/Conv2d_0/weights/read) shape:[1, 3, 3, 8], type:INT8 RO 72 bytes,
+buffer: 68, data:[., y, ., g, ., ...]
+ T#1(MobilenetV1/Conv2d_10_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128],
+type:INT8 RO 1152 bytes, buffer: 72, data:[W, ., d, ., ., ...]
+ T#2(MobilenetV1/Conv2d_10_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO
+16384 bytes, buffer: 14, data:[., .,
+, ., ., ...]
+ T#3(MobilenetV1/Conv2d_11_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128],
+type:INT8 RO 1152 bytes, buffer: 13, data:[., `, ., :, ., ...]
+ T#4(MobilenetV1/Conv2d_11_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO
+16384 bytes, buffer: 12, data:[., ., ., ., ., ...]
+ T#5(MobilenetV1/Conv2d_12_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128],
+type:INT8 RO 1152 bytes, buffer: 10, data:[z, ., ., ?, ., ...]
+ T#6(MobilenetV1/Conv2d_12_pointwise/weights/read) shape:[256, 1, 1, 128], type:INT8 RO
+32768 bytes, buffer: 69, data:[/, ., ., ., #, ...]
+ T#7(MobilenetV1/Conv2d_13_depthwise/depthwise_weights/read) shape:[1, 3, 3, 256],
+type:INT8 RO 2304 bytes, buffer: 7, data:[., ., w, ., ., ...]
+ T#8(MobilenetV1/Conv2d_13_pointwise/weights/read) shape:[256, 1, 1, 256], type:INT8 RO
+65536 bytes, buffer: 5, data:[&, ., ., ., ., ...]
+ T#9(MobilenetV1/Conv2d_1_depthwise/depthwise_weights/read) shape:[1, 3, 3, 8],
+type:INT8 RO 72 bytes, buffer: 60, data:[., ., ., ., ., ...]
+ T#10(MobilenetV1/Conv2d_1_pointwise/weights/read) shape:[16, 1, 1, 8], type:INT8 RO 128
+bytes, buffer: 63, data:[., ., ., ., ., ...]
+ T#11(MobilenetV1/Conv2d_2_depthwise/depthwise_weights/read) shape:[1, 3, 3, 16],
+type:INT8 RO 144 bytes, buffer: 58, data:[O, *, ., !, ., ...]
+ T#12(MobilenetV1/Conv2d_2_pointwise/weights/read) shape:[32, 1, 1, 16], type:INT8 RO
+512 bytes, buffer: 61, data:[., 4, ., ., 8, ...]
+ T#13(MobilenetV1/Conv2d_3_depthwise/depthwise_weights/read) shape:[1, 3, 3, 32],
+type:INT8 RO 288 bytes, buffer: 35, data:[., 1, ;, M, ., ...]
+ T#14(MobilenetV1/Conv2d_3_pointwise/weights/read) shape:[32, 1, 1, 32], type:INT8 RO
+1024 bytes, buffer: 33, data:[., ., ., ., ., ...]
+ T#15(MobilenetV1/Conv2d_4_depthwise/depthwise_weights/read) shape:[1, 3, 3, 32],
+type:INT8 RO 288 bytes, buffer: 32, data:[., ;, ., ., ., ...]
+ T#16(MobilenetV1/Conv2d_4_pointwise/weights/read) shape:[64, 1, 1, 32], type:INT8 RO
+2048 bytes, buffer: 30, data:[., ., ., 5, ., ...]
+ T#17(MobilenetV1/Conv2d_5_depthwise/depthwise_weights/read) shape:[1, 3, 3, 64],
+type:INT8 RO 576 bytes, buffer: 77, data:[G, ., ., ., ., ...]
+ T#18(MobilenetV1/Conv2d_5_pointwise/weights/read) shape:[64, 1, 1, 64], type:INT8 RO
+4096 bytes, buffer: 28, data:[., 2, ., $, ., ...]
+ T#19(MobilenetV1/Conv2d_6_depthwise/depthwise_weights/read) shape:[1, 3, 3, 64],
+type:INT8 RO 576 bytes, buffer: 27, data:[., 1, z, ., U, ...]
+ T#20(MobilenetV1/Conv2d_6_pointwise/weights/read) shape:[128, 1, 1, 64], type:INT8 RO
+8192 bytes, buffer: 25, data:[5, ., ., ., V, ...]
+ T#21(MobilenetV1/Conv2d_7_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128],
+type:INT8 RO 1152 bytes, buffer: 23, data:[., ., ., ., ., ...]
+ T#22(MobilenetV1/Conv2d_7_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO
+16384 bytes, buffer: 21, data:[., ., ., ., ., ...]
+ T#23(MobilenetV1/Conv2d_8_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128],
+type:INT8 RO 1152 bytes, buffer: 71, data:[., ., ., ., Q, ...]
+ T#24(MobilenetV1/Conv2d_8_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO
+16384 bytes, buffer: 20, data:[@, ., 2, ., 8, ...]
+ T#25(MobilenetV1/Conv2d_9_depthwise/depthwise_weights/read) shape:[1, 3, 3, 128],
+type:INT8 RO 1152 bytes, buffer: 80, data:[^, ., ~, ., ., ...]
+ T#26(MobilenetV1/Conv2d_9_pointwise/weights/read) shape:[128, 1, 1, 128], type:INT8 RO
+16384 bytes, buffer: 16, data:[., ., , ., , ...]
+ T#27(MobilenetV1/Logits/AvgPool_1a/AvgPool) shape:[1, 1, 1, 256], type:INT8
+ T#28(MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd) shape:[1, 1, 1, 2], type:INT8
+ T#29(MobilenetV1/Logits/Conv2d_1c_1x1/Conv2D_bias) shape:[2], type:INT32 RO 8 bytes,
+buffer: 2, data:[16267, -17079]
+ T#30(MobilenetV1/Logits/Conv2d_1c_1x1/weights/read) shape:[2, 1, 1, 256], type:INT8 RO
+512 bytes, buffer: 3, data:[., , ., ., ., ...]
+ T#31(MobilenetV1/Logits/SpatialSqueeze) shape:[1, 2], type:INT8
+ T#32(MobilenetV1/Logits/SpatialSqueeze_shape) shape:[2], type:INT32 RO 8 bytes, buffer:
+1, data:[1, 2]
+ T#33(MobilenetV1/MobilenetV1/Conv2d_0/Conv2D_bias) shape:[8], type:INT32 RO 32 bytes,
+buffer: 82, data:[3774, -107, -84394, -13908, 20697, ...]
+ T#34(MobilenetV1/MobilenetV1/Conv2d_0/Relu6) shape:[1, 48, 48, 8], type:INT8
+ T#35(MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#36(MobilenetV1/MobilenetV1/Conv2d_10_depthwise/depthwise_bias) shape:[128],
+type:INT32 RO 512 bytes, buffer: 22, data:[-6485, 294, 686, -6011, -5196, ...]
+ T#37(MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Conv2D_bias) shape:[128], type:INT32
+RO 512 bytes, buffer: 70, data:[7116, 8066, 11755, 11674, 9983, ...]
+ T#38(MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#39(MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#40(MobilenetV1/MobilenetV1/Conv2d_11_depthwise/depthwise_bias) shape:[128],
+type:INT32 RO 512 bytes, buffer: 19, data:[7735, 5235, 4334, -6485, 9397, ...]
+ T#41(MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Conv2D_bias) shape:[128], type:INT32
+RO 512 bytes, buffer: 11, data:[2947, 10152, -7865, -554, -13760, ...]
+ T#42(MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#43(MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6) shape:[1, 3, 3, 128], type:INT8
+ T#44(MobilenetV1/MobilenetV1/Conv2d_12_depthwise/depthwise_bias) shape:[128],
+type:INT32 RO 512 bytes, buffer: 9, data:[-4755, 7899, -488, -2954, 2990, ...]
+ T#45(MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Conv2D_bias) shape:[256], type:INT32
+RO 1024 bytes, buffer: 8, data:[-6269, -22458, 13332, -16368, 4435, ...]
+ T#46(MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6) shape:[1, 3, 3, 256], type:INT8
+ T#47(MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6) shape:[1, 3, 3, 256], type:INT8
+ T#48(MobilenetV1/MobilenetV1/Conv2d_13_depthwise/depthwise_bias) shape:[256],
+type:INT32 RO 1024 bytes, buffer: 6, data:[333, -4743, -310, -2471, 4804, ...]
+ T#49(MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Conv2D_bias) shape:[256], type:INT32
+RO 1024 bytes, buffer: 4, data:[6677, -3593, 3754, 26316, -4761, ...]
+ T#50(MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6) shape:[1, 3, 3, 256], type:INT8
+ T#51(MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6) shape:[1, 48, 48, 8], type:INT8
+ T#52(MobilenetV1/MobilenetV1/Conv2d_1_depthwise/depthwise_bias) shape:[8], type:INT32
+RO 32 bytes, buffer: 56, data:[31132, 28, 273, -2692, 7409, ...]
+ T#53(MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Conv2D_bias) shape:[16], type:INT32 RO
+64 bytes, buffer: 36, data:[10064, 1130, -13056, -30284, -23349, ...]
+ T#54(MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6) shape:[1, 48, 48, 16], type:INT8
+ T#55(MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6) shape:[1, 24, 24, 16], type:INT8
+ T#56(MobilenetV1/MobilenetV1/Conv2d_2_depthwise/depthwise_bias) shape:[16], type:INT32
+RO 64 bytes, buffer: 48, data:[306, -158, 19181, -364, 6237, ...]
+ T#57(MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Conv2D_bias) shape:[32], type:INT32 RO
+128 bytes, buffer: 62, data:[-7649, 12287, -4433, 5851, -188, ...]
+ T#58(MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6) shape:[1, 24, 24, 32], type:INT8
+ T#59(MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6) shape:[1, 24, 24, 32], type:INT8
+ T#60(MobilenetV1/MobilenetV1/Conv2d_3_depthwise/depthwise_bias) shape:[32], type:INT32
+RO 128 bytes, buffer: 34, data:[7297, -498, 263, -1975, 2260, ...]
+ T#61(MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Conv2D_bias) shape:[32], type:INT32 RO
+128 bytes, buffer: 59, data:[-4742, -4160, 6985, 8647, 29773, ...]
+ T#62(MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6) shape:[1, 24, 24, 32], type:INT8
+ T#63(MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6) shape:[1, 12, 12, 32], type:INT8
+ T#64(MobilenetV1/MobilenetV1/Conv2d_4_depthwise/depthwise_bias) shape:[32], type:INT32
+RO 128 bytes, buffer: 31, data:[28588, 363, 27592, 22294, -4344, ...]
+ T#65(MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Conv2D_bias) shape:[64], type:INT32 RO
+256 bytes, buffer: 76, data:[12683, 36581, 6206, 1236, 15834, ...]
+ T#66(MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6) shape:[1, 12, 12, 64], type:INT8
+ T#67(MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6) shape:[1, 12, 12, 64], type:INT8
+ T#68(MobilenetV1/MobilenetV1/Conv2d_5_depthwise/depthwise_bias) shape:[64], type:INT32
+RO 256 bytes, buffer: 29, data:[-6353, 9090, -30, -1019, -496, ...]
+ T#69(MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Conv2D_bias) shape:[64], type:INT32 RO
+256 bytes, buffer: 84, data:[3895, -6563, -8843, -2066, -1372, ...]
+ T#70(MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6) shape:[1, 12, 12, 64], type:INT8
+ T#71(MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6) shape:[1, 6, 6, 64], type:INT8
+ T#72(MobilenetV1/MobilenetV1/Conv2d_6_depthwise/depthwise_bias) shape:[64], type:INT32
+RO 256 bytes, buffer: 26, data:[20437, -365, -2518, 20827, -904, ...]
+ T#73(MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Conv2D_bias) shape:[128], type:INT32 RO
+512 bytes, buffer: 24, data:[-10120, 9768, 3524, 3796, 6896, ...]
+ T#74(MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#75(MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#76(MobilenetV1/MobilenetV1/Conv2d_7_depthwise/depthwise_bias) shape:[128], type:INT32
+RO 512 bytes, buffer: 78, data:[-3969, -1910, -2425, -114, 4456, ...]
+ T#77(MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Conv2D_bias) shape:[128], type:INT32 RO
+512 bytes, buffer: 83, data:[-13202, 13929, -4357, 19492, 1971, ...]
+ T#78(MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#79(MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#80(MobilenetV1/MobilenetV1/Conv2d_8_depthwise/depthwise_bias) shape:[128], type:INT32
+RO 512 bytes, buffer: 55, data:[-6169, -10, -2788, 14420, -7457, ...]
+ T#81(MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Conv2D_bias) shape:[128], type:INT32 RO
+512 bytes, buffer: 18, data:[155, -3073, 291, -902, -9942, ...]
+ T#82(MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#83(MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#84(MobilenetV1/MobilenetV1/Conv2d_9_depthwise/depthwise_bias) shape:[128], type:INT32
+RO 512 bytes, buffer: 17, data:[-2063, 10755, -12037, -6417, 2147, ...]
+ T#85(MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Conv2D_bias) shape:[128], type:INT32 RO
+512 bytes, buffer: 15, data:[-1872, -7549, 13994, 3191, -614, ...]
+ T#86(MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6) shape:[1, 6, 6, 128], type:INT8
+ T#87(MobilenetV1/Predictions/Reshape_1) shape:[1, 2], type:INT8
+ T#88(input) shape:[1, 96, 96, 1], type:INT8
+---------------------------------------------------------------
+ Model size: 300568 bytes
+ Non-data buffer size: 81640 bytes (27.16 )
+ Total data buffer size: 218928 bytes (72.84 )
+ (Zero value buffers): 0 bytes (00.00 )
+* Buffers of TFLite model are mostly used for constant tensors.
+ And zero value buffers are buffers filled with zeros.
+ Non-data buffers area are used to store operators, subgraphs and etc.
+ You can find more details from
+https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/schema/schema.fbs
+--------------------
+TfliteGetModel took 0 ticks (0 ms).
+
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+DEPTHWISE_CONV_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+AVERAGE_POOL_2D took 0 ticks (0 ms).
+CONV_2D took 0 ticks (0 ms).
+RESHAPE took 0 ticks (0 ms).
+SOFTMAX took 0 ticks (0 ms).
+
+"Unique Tag","Total ticks across all events with that tag."
+DEPTHWISE_CONV_2D, 0
+CONV_2D, 0
+AVERAGE_POOL_2D, 0
+RESHAPE, 0
+SOFTMAX, 0
+"total number of ticks", 0
+
+[[ Table ]]: Arena
+ Arena Bytes % Arena
+ Total | 84436 | 100.00
+NonPersistent | 55296 | 65.49
+ Persistent | 29140 | 34.51
+
+[[ Table ]]: Allocations
+ Allocation Id Used Requested Count % Memory
+ Eval tensor data | 0 | 1068 | 1068 | 89 | 1.26
+ Persistent tensor data | 1 | 64 | 64 | 2 | 0.08
+Persistent quantization data | 2 | 40 | 40 | 4 | 0.05
+ Persistent buffer data | 3 | 25876 | 25704 | 90 | 30.65
+ Tensor variable buffer data | 4 | 0 | 0 | 0 | 0.00
+ Node and registration array | 5 | 992 | 992 | 31 | 1.17
+ Operation data | 6 | 0 | 0 | 0 | 0.00
+
+Application exit code: 0.
+
+Info: /OSCI/SystemC: Simulation stopped by user.
+[warning ][main@0][01 ns] Simulation stopped by user
+
+--- FVP_MPS3_Corstone_SSE_300 statistics: -------------------------------------
+Simulated time : 5.474678s
+User time : 1.609473s
+System time : 0.123380s
+Wall time : 1.892705s
+Performance index : 2.89
+FVP_MPS3_Corstone_SSE_300.cpu0 : 78.98 MIPS ( 136866941 Inst)
+-------------------------------------------------------------------------------
```
diff --git a/tensorflow/lite/micro/tools/benchmarking/analyze_model.py b/tensorflow/lite/micro/tools/benchmarking/analyze_model.py
new file mode 100644
index 0000000..f2ff013
--- /dev/null
+++ b/tensorflow/lite/micro/tools/benchmarking/analyze_model.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from absl import app
+from absl import flags
+
+import tensorflow as tf
+
+_MODEL_PATH = flags.DEFINE_string(
+ name='model_file',
+ default='',
+ help='path for the .tflite model file.',
+)
+
+
+def _main(_):
+ """outputs model analysis to stdout/stderr"""
+ tf.lite.experimental.Analyzer.analyze(model_path=_MODEL_PATH.value)
+
+
+if __name__ == '__main__':
+ app.run(_main)
diff --git a/tensorflow/lite/micro/tools/benchmarking/collect_meta_data.sh b/tensorflow/lite/micro/tools/benchmarking/collect_meta_data.sh
new file mode 100755
index 0000000..c60bdf3
--- /dev/null
+++ b/tensorflow/lite/micro/tools/benchmarking/collect_meta_data.sh
@@ -0,0 +1,177 @@
+#!/usr/bin/env bash
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Collect generic benchmark meta data and insert resulting strings into
+# the file designated by TEMPLATE_FILE.
+#
+# Takes no arguments.
+#
+# Uses the following environment variables:
+# TEMPLATE_FILE - path to the template source file
+# GENERATED_FILE - path to the generated source file with substituted strings
+# TENSORFLOW_ROOT - path to the root of the source tree
+# MODEL_FILE - path to the .tflite model file
+# CC - path to C compiler
+# CXX - path to C++ compiler
+# CC_FLAGS - C compiler flags
+# CXX_FLAGS - C++ compiler flags
+# KERNEL_OPTIMIZATION - kernel optimization flags
+# CORE_OPTIMIZATION - core optimization flags
+# THIRD_PARTY_KERNEL_OPTIMIZATION - third pary kernel optimization flags
+# TARGET - target platform (xtensa, cortex_m_corstone_300, etc.)
+# TARGET_ARCH - target architecture (hifi5, cortex-m0, etc.)
+# OPTIMIZED_KERNEL - optimized kernel (xtensa, cmsis_nn, etc.)
+# BUILD_TYPE - type of build (default, release, etc.)
+# XTENSA_CORE - Xtensa core specification
+# XTENSA_BASE - Xtensa base install directory
+# XTENSA_TOOLS_VERSION - Xtensa tooling version
+
+
+set -e
+
+source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/ci_build/helper_functions.sh
+
+function substitute_strings() {
+ search="// %%%_$1_%%%"
+ lines=$(fold -w 90 -s <<< "$2")
+ SAVED_IFS=${IFS}
+ IFS=$'\n' lines_array=( ${lines} )
+ IFS=${SAVED_IFS}
+ replacement=()
+ for line in "${lines_array[@]}"; do
+ line=$(sed -e 's/"/\\"/g' <<< "${line}")
+ line=$(printf '"%s",\n ' "${line}")
+ replacement+=( "${line}" )
+ done
+
+ tempfile=$(mktemp)
+
+ SEARCH_PATTERN="$search" REPLACEMENT_PATTERN="${replacement[@]}" awk '
+ BEGIN {
+ search = ENVIRON["SEARCH_PATTERN"]
+ replacement = ENVIRON["REPLACEMENT_PATTERN"]
+ }
+ s = index($0,search) {
+ $0 = substr($0,1,s-1) replacement substr($0,s+length(search))
+ }
+ { print }
+ ' "${GENERATED_FILE}" > ${tempfile}
+ mv ${tempfile} "${GENERATED_FILE}"
+}
+
+mkdir -p $(dirname ${GENERATED_FILE})
+cp -p ${TEMPLATE_FILE} ${GENERATED_FILE}
+
+# model analysis and SHA1
+if [[ ${MODEL_FILE} ]]; then
+ python3 -m pip install absl-py tensorflow
+ result=$(python3 \
+ "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/benchmarking/analyze_model.py" \
+ --model_file="${MODEL_FILE}" \
+ )
+ substitute_strings model_analysis_strings "${result}"
+
+ result=$(shasum -b "${MODEL_FILE}" | cut -f 1 -d ' ')
+ substitute_strings model_sha1_strings "${result}"
+fi
+
+# compile date
+result=$(date)
+substitute_strings compilation_date_strings "${result}"
+
+GIT_TENSORFLOW_ROOT="${TENSORFLOW_ROOT:-./}"
+set +e
+# Git repo commit information
+result=$(cd ${GIT_TENSORFLOW_ROOT} && git rev-parse --verify HEAD)
+if [[ $? != 0 ]]; then
+ result="<git commit information not available>"
+fi
+substitute_strings git_commit_strings "${result}"
+
+# Git repo status information
+result=$(cd ${GIT_TENSORFLOW_ROOT} && git status)
+if [[ $? != 0 ]]; then
+ result="<git status information not available>"
+fi
+substitute_strings git_status_strings "${result}"
+set -e
+
+# Compiler information
+result="${CC}"
+substitute_strings cc_name_strings "${result}"
+result=$("${CC}" --version)
+substitute_strings cc_version_strings "${result}"
+result="${CC_FLAGS}"
+substitute_strings cc_flags_strings "${result}"
+
+result="${CXX}"
+substitute_strings cxx_name_strings "${result}"
+result=$("${CXX}" --version)
+substitute_strings cxx_version_strings "${result}"
+result="${CXX_FLAGS}"
+substitute_strings cxx_flags_strings "${result}"
+
+result="kernel= ${KERNEL_OPTIMIZATION}"
+result+=" core= ${CORE_OPTIMIZATION}"
+result+=" third-party-kernel= ${THIRD_PARTY_KERNEL_OPTIMIZATION}"
+substitute_strings optimization_flag_strings "${result}"
+
+# Target information
+TARGET="${TARGET:-linux}"
+TARGET_ARCH="${TARGET_ARCH:-x86}"
+OPTIMIZED_KERNEL="${OPTIMIZED_KERNEL:-none}"
+BUILD_TYPE="${BUILD_TYPE:-default}"
+result=$(printf 'TARGET=%s\nTARGET_ARCH=%s\nOPTIMIZATION=%s\nBUILD_TYPE=%s\n' \
+ "${TARGET}" \
+ "${TARGET_ARCH}" \
+ "${OPTIMIZED_KERNEL}" \
+ "${BUILD_TYPE}" \
+)
+if [[ ${XTENSA_CORE} ]]; then
+ result+=$(printf '\nXTENSA_CORE=%s' "${XTENSA_CORE}")
+ result+=$(printf '\nXTENSA_BASE=%s' "${XTENSA_BASE}")
+ result+=$(printf '\nXTENSA_TOOLS_VERSION=%s' "${XTENSA_TOOLS_VERSION}")
+fi
+substitute_strings target_info_strings "${result}"
+
+download_scripts=()
+download_script_args=( "--no-downloads" )
+if [[ ${OPTIMIZED_KERNEL} == "cmsis_nn" ]]; then
+ download_scripts+=( "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh" )
+ download_script_args+=( "${TENSORFLOW_ROOT}" )
+elif [[ ${OPTIMIZED_KERNEL} == "xtensa" ]]; then
+ download_script_args+=( "${TARGET_ARCH}" "${TENSORFLOW_ROOT}" )
+ if [[ ${TARGET_ARCH} =~ ^(vision_p6)$ ]]; then
+ download_scripts+=( "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh" )
+ elif [[ ${TARGET_ARCH} =~ ^(hifi3|hifi4|hifi5)$ ]]; then
+ download_scripts+=( "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh" )
+ download_scripts+=( "${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh" )
+ fi
+fi
+
+if [[ ${#download_scripts[@]} -gt 0 ]]; then
+ results_url=
+ results_md5=
+ for script in "${download_scripts[@]}"; do
+ results=$("${script}" "${download_script_args[@]}" 2>&1)
+ url=$(sed -rn 's/^LIBRARY_URL=(.*)$/\1/p' <<< "${results}")
+ results_url+=$(printf '\n%s' "${url}")
+ md5=$(sed -rn 's/^LIBRARY_MD5=(.*)$/\1/p' <<< "${results}")
+ results_md5+=$(printf '\n%s' "${md5}")
+ done
+ substitute_strings nn_library_url_strings "${results_url}"
+ substitute_strings nn_library_md5_strings "${results_md5}"
+fi
diff --git a/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc b/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
index eef5f4c..9874a63 100644
--- a/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
+++ b/tensorflow/lite/micro/tools/benchmarking/generic_model_benchmark.cc
@@ -34,24 +34,27 @@
#include "tensorflow/lite/micro/system_setup.h"
#include "tensorflow/lite/micro/tools/benchmarking/metrics.h"
#include "tensorflow/lite/micro/tools/benchmarking/op_resolver.h"
+#include "tensorflow/lite/micro/tools/benchmarking/show_meta_data.h"
#include "tensorflow/lite/schema/schema_generated.h"
-#if defined(MODEL_HEADER_PATH)
-#if !defined(MODEL_NAME)
-#error "MODEL_NAME missing from CCFLAGS"
-#endif // !defined(MODEL_NAME)
+#if defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_MODEL_HEADER_PATH)
+#error "GENERIC_BENCHMARK_MODEL_HEADER_PATH missing from CXXFLAGS"
+#endif // !defined(GENERIC_BENCHMARK_MODEL_HEADER_PATH)
+#if !defined(GENERIC_BENCHMARK_MODEL_NAME)
+#error "GENERIC_BENCHMARK_MODEL_NAME missing from CXXFLAGS"
+#endif // !defined(GENERIC_BENCHMARK_MODEL_NAME)
-#include MODEL_HEADER_PATH
+#include GENERIC_BENCHMARK_MODEL_HEADER_PATH
#define __MODEL_DATA(x) g_##x##_model_data
#define _MODEL_DATA(x) __MODEL_DATA(x)
-#define MODEL_DATA _MODEL_DATA(MODEL_NAME)
+#define MODEL_DATA _MODEL_DATA(GENERIC_BENCHMARK_MODEL_NAME)
#define __MODEL_SIZE(x) g_##x##_model_data_size
#define _MODEL_SIZE(x) __MODEL_SIZE(x)
-#define MODEL_SIZE _MODEL_SIZE(MODEL_NAME)
+#define MODEL_SIZE _MODEL_SIZE(GENERIC_BENCHMARK_MODEL_NAME)
-#define USING_BUILTIN_MODEL
-#endif // defind(MODEL_HEADER_PATH)
+#endif // defind(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
/*
* Generic model benchmark. Evaluates runtime performance of a provided model
@@ -68,14 +71,14 @@
// so randomness isn't really needed.
constexpr uint32_t kRandomSeed = 0xFB;
-#if !defined(USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
constexpr size_t kTensorArenaSize = 3e6;
constexpr size_t kModelSize = 2e6;
-#elif defined(TENSOR_ARENA_SIZE)
-constexpr size_t kTensorArenaSize = TENSOR_ARENA_SIZE;
+#elif defined(GENERIC_BENCHMARK_TENSOR_ARENA_SIZE)
+constexpr size_t kTensorArenaSize = GENERIC_BENCHMARK_TENSOR_ARENA_SIZE;
#else
constexpr size_t kTensorArenaSize = 5e6 - MODEL_SIZE;
-#endif // !defined(USING_BUILTIN_MODEL)
+#endif // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
constexpr int kNumResourceVariable = 100;
@@ -95,7 +98,7 @@
}
}
-#if !defined(USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
bool ReadFile(const char* file_name, void* buffer, size_t buffer_size) {
std::unique_ptr<FILE, decltype(&fclose)> file(fopen(file_name, "rb"), fclose);
@@ -120,14 +123,12 @@
return true;
}
-#endif // !defined(USING_BUILTIN_MODEL)
+#endif // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
int Benchmark(const uint8_t* model_data, tflite::PrettyPrintType print_type) {
Profiler profiler;
alignas(16) static uint8_t tensor_arena[kTensorArenaSize];
- MicroPrintf("\nConfigured arena size = %d\n", kTensorArenaSize);
-
uint32_t event_handle = profiler.BeginEvent("TfliteGetModel");
const tflite::Model* model = tflite::GetModel(model_data);
profiler.EndEvent(event_handle);
@@ -180,18 +181,18 @@
} // namespace
} // namespace tflite
-#if !defined(USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
void usage(const char* prog_name) {
MicroPrintf("usage: %s filename [--csv]", prog_name);
}
-#endif // !defined(USING_BUILTIN_MODEL)
+#endif // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
int main(int argc, char** argv) {
// Which format should be used to output debug information.
tflite::PrettyPrintType print_type = tflite::PrettyPrintType::kTable;
tflite::InitializeTarget();
-#if !defined(USING_BUILTIN_MODEL)
+#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
if (argc < 2 || argc > 3) {
usage(argv[0]);
return -1;
@@ -214,7 +215,9 @@
}
#else
const uint8_t* model_data = MODEL_DATA;
-#endif // !defined(USING_BUILTIN_MODEL)
+#endif // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+ MicroPrintf("\nConfigured arena size = %d\n", tflite::kTensorArenaSize);
+ tflite::GenericBenchmarkShowMetaData();
return tflite::Benchmark(model_data, print_type);
}
diff --git a/tensorflow/lite/micro/tools/benchmarking/show_meta_data.cc.template b/tensorflow/lite/micro/tools/benchmarking/show_meta_data.cc.template
new file mode 100644
index 0000000..a2102a4
--- /dev/null
+++ b/tensorflow/lite/micro/tools/benchmarking/show_meta_data.cc.template
@@ -0,0 +1,177 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstddef>
+#include <cstring>
+#include <type_traits>
+
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/micro/tools/benchmarking/show_meta_data.h"
+
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+#include "NatureDSP_Signal_id.h"
+#include "xa_nnlib_standards.h"
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+
+namespace tflite {
+namespace {
+
+#if defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+const char* model_analysis_strings[] = {
+ // %%%_model_analysis_strings_%%%
+};
+
+const char* model_sha1_strings[] = {
+ // %%%_model_sha1_strings_%%%
+};
+#endif // defind(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+
+const char* compilation_date_strings[] = {
+ // %%%_compilation_date_strings_%%%
+};
+
+const char* git_commit_strings[] = {
+ // %%%_git_commit_strings_%%%
+};
+
+const char* git_status_strings[] = {
+ // %%%_git_status_strings_%%%
+};
+
+const char* cc_name_strings[] = {
+ // %%%_cc_name_strings_%%%
+};
+
+const char* cc_version_strings[] = {
+ // %%%_cc_version_strings_%%%
+};
+
+const char* cc_flags_strings[] = {
+ // %%%_cc_flags_strings_%%%
+};
+
+const char* cxx_name_strings[] = {
+ // %%%_cxx_name_strings_%%%
+};
+
+const char* cxx_version_strings[] = {
+ // %%%_cxx_version_strings_%%%
+};
+
+const char* cxx_flags_strings[] = {
+ // %%%_cxx_flags_strings_%%%
+};
+
+const char* optimization_flag_strings[] = {
+ // %%%_optimization_flag_strings_%%%
+};
+
+const char* target_info_strings[] = {
+ // %%%_target_info_strings_%%%
+};
+
+#if defined(CMSIS_NN) || defined(HIFI3) || defined(HIFI4) || defined(HIFI5) || \
+ defined(VISION_P6)
+const char* nn_library_url_strings[] = {
+ // %%%_nn_library_url_strings_%%%
+};
+
+const char* nn_library_md5_strings[] = {
+ // %%%_nn_library_md5_strings_%%%
+};
+#endif // defined(CMSIS_NN) || defined(HIFI3) || defined(HIFI4) ||
+ // defined(HIFI5) || defined(VISION_P6)
+
+void ShowStrings(const char* title, const char** str, const size_t count) {
+ MicroPrintf("%s%s", title, str[0]);
+ for (size_t i = 1; i < count; i++) {
+ MicroPrintf("%s", str[i]);
+ }
+}
+
+void ShowSeparator() { MicroPrintf("--------------------"); }
+
+} // namespace
+
+void GenericBenchmarkShowMetaData() {
+ ShowSeparator();
+ ShowStrings("Compiled on:\n\n", compilation_date_strings,
+ std::extent<decltype(compilation_date_strings)>::value);
+
+ ShowSeparator();
+ ShowStrings("Git SHA: ", git_commit_strings,
+ std::extent<decltype(git_commit_strings)>::value);
+ ShowStrings("\nGit status:\n\n", git_status_strings,
+ std::extent<decltype(git_status_strings)>::value);
+
+ ShowSeparator();
+ ShowStrings("C compiler: ", cc_name_strings,
+ std::extent<decltype(cc_name_strings)>::value);
+ ShowStrings("Version:\n\n", cc_version_strings,
+ std::extent<decltype(cc_version_strings)>::value);
+ ShowStrings("\nFlags:\n\n", cc_flags_strings,
+ std::extent<decltype(cc_flags_strings)>::value);
+ ShowStrings("\nC++ compiler: ", cxx_name_strings,
+ std::extent<decltype(cxx_name_strings)>::value);
+ ShowStrings("Version:\n\n", cxx_version_strings,
+ std::extent<decltype(cxx_version_strings)>::value);
+ ShowStrings("\nFlags:\n\n", cxx_flags_strings,
+ std::extent<decltype(cxx_flags_strings)>::value);
+ ShowStrings("\nOptimization: ", optimization_flag_strings,
+ std::extent<decltype(optimization_flag_strings)>::value);
+
+ ShowSeparator();
+ ShowStrings("Target information:\n\n", target_info_strings,
+ std::extent<decltype(target_info_strings)>::value);
+
+#if defined(CMSIS_NN) || defined(HIFI3) || defined(HIFI4) || defined(HIFI5) || \
+ defined(VISION_P6)
+ ShowSeparator();
+ ShowStrings("NN library download URLs:\n\n", nn_library_url_strings,
+ std::extent<decltype(nn_library_url_strings)>::value);
+ ShowStrings("\nNN library MD5 checksums:\n\n", nn_library_md5_strings,
+ std::extent<decltype(nn_library_md5_strings)>::value);
+#endif // defined(CMSIS_NN) || defined(HIFI3) || defined(HIFI4) ||
+ // defined(HIFI5) || defined(VISION_P6)
+
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+ ShowSeparator();
+
+ char version_buffer[30 + 1];
+ memset(version_buffer, 0, sizeof(version_buffer));
+ NatureDSP_Signal_get_library_version(version_buffer);
+ MicroPrintf("NatureDSP library version: %s", version_buffer);
+ memset(version_buffer, 0, sizeof(version_buffer));
+ NatureDSP_Signal_get_library_api_version(version_buffer);
+ MicroPrintf("NatureDSP API version: %s", version_buffer);
+
+ const char* nnlib_library_version = xa_nnlib_get_lib_version_string();
+ const char* nnlib_api_version = xa_nnlib_get_lib_api_version_string();
+ MicroPrintf("NNLIB library version: %s", nnlib_library_version);
+ MicroPrintf("NNLIB API version: %s", nnlib_api_version);
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+
+#if defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+ ShowSeparator();
+ ShowStrings("Model SHA1:\n\n", model_sha1_strings,
+ std::extent<decltype(model_sha1_strings)>::value);
+ ShowStrings("\nModel analysis:\n\n", model_analysis_strings,
+ std::extent<decltype(model_analysis_strings)>::value);
+#endif // defind(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
+
+ ShowSeparator();
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/tools/benchmarking/show_meta_data.h b/tensorflow/lite/micro/tools/benchmarking/show_meta_data.h
new file mode 100644
index 0000000..37cf616
--- /dev/null
+++ b/tensorflow/lite/micro/tools/benchmarking/show_meta_data.h
@@ -0,0 +1,24 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+namespace tflite {
+
+#if !defined(GENERIC_BENCHMARK_NO_META_DATA)
+void GenericBenchmarkShowMetaData();
+#else
+inline void GenericBenchmarkShowMetaData() {}
+#endif // defined(GENERIC_BENCHMARK_NO_META_DATA)
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/tools/ci_build/test_cortex_m_corstone_300.sh b/tensorflow/lite/micro/tools/ci_build/test_cortex_m_corstone_300.sh
index 516c181..a5d02db 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_cortex_m_corstone_300.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_cortex_m_corstone_300.sh
@@ -42,3 +42,13 @@
readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean
readable_run make -j$(nproc) -f tensorflow/lite/micro/tools/make/Makefile CO_PROCESSOR=ethos_u OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} TOOLCHAIN=${TOOLCHAIN} build
readable_run make -f tensorflow/lite/micro/tools/make/Makefile CO_PROCESSOR=ethos_u OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} TOOLCHAIN=${TOOLCHAIN} test
+
+# run generic benchmark
+readable_run make -j$(nproc) -f tensorflow/lite/micro/tools/make/Makefile \
+ CO_PROCESSOR=ethos_u \
+ OPTIMIZED_KERNEL_DIR=${OPTIMIZED_KERNEL_DIR} \
+ TARGET=${TARGET} \
+ TARGET_ARCH=${TARGET_ARCH} \
+ TOOLCHAIN=${TOOLCHAIN} \
+ GENERIC_BENCHMARK_MODEL_PATH=tensorflow/lite/micro/models/person_detect.tflite \
+ run_tflm_benchmark
diff --git a/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh b/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
index 623238e..998827f 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_x86_default.sh
@@ -40,3 +40,10 @@
readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile build TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile test TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
readable_run make -s -j8 -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile integration_tests TENSORFLOW_ROOT=${TENSORFLOW_ROOT} EXTERNAL_DIR=${EXTERNAL_DIR}
+
+# run generic benchmark
+readable_run make -j$(nproc) -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+ TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+ EXTERNAL_DIR=${EXTERNAL_DIR} \
+ GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+ run_tflm_benchmark
diff --git a/tensorflow/lite/micro/tools/ci_build/test_xtensa_fusion_f1.sh b/tensorflow/lite/micro/tools/ci_build/test_xtensa_fusion_f1.sh
index ae4e858..2fd6bf8 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_xtensa_fusion_f1.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_xtensa_fusion_f1.sh
@@ -68,4 +68,15 @@
TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
EXTERNAL_DIR=${EXTERNAL_DIR} \
test -j$(nproc)
+
+# run generic benchmark
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+ TARGET=xtensa \
+ TARGET_ARCH=hifi3 \
+ OPTIMIZED_KERNEL_DIR=xtensa \
+ XTENSA_CORE=F1_190305_swupgrade \
+ TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+ EXTERNAL_DIR=${EXTERNAL_DIR} \
+ GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+ run_tflm_benchmark -j$(nproc)
fi
diff --git a/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi3z.sh b/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi3z.sh
index cf038c8..ff3d600 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi3z.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi3z.sh
@@ -95,4 +95,15 @@
TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
EXTERNAL_DIR=${EXTERNAL_DIR} \
test -j$(nproc)
+
+ # run generic benchmark
+ readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+ TARGET=xtensa \
+ TARGET_ARCH=hifi3 \
+ OPTIMIZED_KERNEL_DIR=xtensa \
+ XTENSA_CORE=HIFI_190304_swupgrade \
+ TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+ EXTERNAL_DIR=${EXTERNAL_DIR} \
+ GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+ run_tflm_benchmark -j$(nproc)
fi
diff --git a/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi5.sh b/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi5.sh
index 82a04a9..0ad29e8 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi5.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_xtensa_hifi5.sh
@@ -47,3 +47,14 @@
TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
EXTERNAL_DIR=${EXTERNAL_DIR} \
test -j$(nproc)
+
+# run generic benchmark
+readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+ TARGET=xtensa \
+ TARGET_ARCH=hifi5 \
+ OPTIMIZED_KERNEL_DIR=xtensa \
+ XTENSA_CORE=PRD_H5_RDO_07_01_2022 \
+ TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+ EXTERNAL_DIR=${EXTERNAL_DIR} \
+ GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+ run_tflm_benchmark -j$(nproc)
\ No newline at end of file
diff --git a/tensorflow/lite/micro/tools/ci_build/test_xtensa_vision_p6.sh b/tensorflow/lite/micro/tools/ci_build/test_xtensa_vision_p6.sh
index a2744b5..1c6de93 100755
--- a/tensorflow/lite/micro/tools/ci_build/test_xtensa_vision_p6.sh
+++ b/tensorflow/lite/micro/tools/ci_build/test_xtensa_vision_p6.sh
@@ -54,4 +54,15 @@
TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
EXTERNAL_DIR=${EXTERNAL_DIR} \
test -j$(nproc)
+
+ # run generic benchmark
+ readable_run make -f ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/Makefile \
+ TARGET=xtensa \
+ TARGET_ARCH=vision_p6 \
+ OPTIMIZED_KERNEL_DIR=xtensa \
+ XTENSA_CORE=P6_200528 \
+ TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \
+ EXTERNAL_DIR=${EXTERNAL_DIR} \
+ GENERIC_BENCHMARK_MODEL_PATH=${TENSORFLOW_ROOT}tensorflow/lite/micro/models/person_detect.tflite \
+ run_tflm_benchmark -j$(nproc)
fi
diff --git a/tensorflow/lite/micro/tools/make/bash_helpers.sh b/tensorflow/lite/micro/tools/make/bash_helpers.sh
index f29a641..e5446de 100755
--- a/tensorflow/lite/micro/tools/make/bash_helpers.sh
+++ b/tensorflow/lite/micro/tools/make/bash_helpers.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,32 @@
# limitations under the License.
# ==============================================================================
+# Check the download path argument
+#
+# Parameter(s):
+# ${1} - path to the download directory or --no-downloads
+#
+# Outputs:
+# "yes" or "no"
+function check_should_download() {
+ if [[ ${1} == "--no-downloads" ]]; then
+ echo "no"
+ else
+ echo "yes"
+ fi
+}
+
+# Show the download URL and MD5 checksum
+#
+# Parameter(s):
+# ${1} - download URL
+# ${2} - download MD5 checksum
+#
+# Download scripts require informational output should be on stderr.
+function show_download_url_md5() {
+ echo >&2 "LIBRARY_URL=${1}"
+ echo >&2 "LIBRARY_MD5=${2}"
+}
# Compute the MD5 sum.
#
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
index bd8c615..fb0ad92 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
@@ -36,21 +36,22 @@
source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/bash_helpers.sh
DOWNLOADS_DIR=${1}
-if [ ! -d ${DOWNLOADS_DIR} ]; then
- echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
- exit 1
-fi
-
DOWNLOADED_CMSIS_NN_PATH=${DOWNLOADS_DIR}/cmsis_nn
-if [ -d ${DOWNLOADED_CMSIS_NN_PATH} ]; then
+ZIP_PREFIX_NN="01dee38e6d6bfbbf202f0cd425bbea1731747d51"
+CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip"
+CMSIS_NN_MD5="f20be93ededf42bb704c19f699a24313"
+
+should_download=$(check_should_download ${DOWNLOADS_DIR})
+
+if [[ ${should_download} == "no" ]]; then
+ show_download_url_md5 ${CMSIS_NN_URL} ${CMSIS_NN_MD5}
+elif [ ! -d ${DOWNLOADS_DIR} ]; then
+ echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
+ exit 1
+elif [ -d ${DOWNLOADED_CMSIS_NN_PATH} ]; then
echo >&2 "${DOWNLOADED_CMSIS_NN_PATH} already exists, skipping the download."
else
-
- ZIP_PREFIX_NN="01dee38e6d6bfbbf202f0cd425bbea1731747d51"
- CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip"
- CMSIS_NN_MD5="f20be93ededf42bb704c19f699a24313"
-
# wget is much faster than git clone of the entire repo. So we wget a specific
# version and can then apply a patch, as needed.
wget ${CMSIS_NN_URL} -O /tmp/${ZIP_PREFIX_NN}.zip >&2
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc b/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
index b6db417..70e1880 100644
--- a/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
+++ b/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
@@ -69,7 +69,8 @@
$(shell find $(NDSPLIB_PATH)/library/fft/fft -name "*.c") \
$(shell find $(NDSPLIB_PATH)/library/fft/fft_ie -name "*.c") \
$(shell find $(NDSPLIB_PATH)/library/fft/fft_ief -name "*.c") \
- $(shell find $(NDSPLIB_PATH)/library/twiddles -name "*.c")
+ $(shell find $(NDSPLIB_PATH)/library/twiddles -name "*.c") \
+ $(shell find $(NDSPLIB_PATH)/library -name "version.c")
EXCLUDED_NNLIB_SRCS = \
$(NNLIB_PATH)/algo/layers/cnn/src/xa_nn_cnn_api.c \
@@ -125,7 +126,8 @@
$(shell find $(NDSPLIB_PATH)/library/fft/fft -name "*.c") \
$(shell find $(NDSPLIB_PATH)/library/fft/fft_ie -name "*.c") \
$(shell find $(NDSPLIB_PATH)/library/fft/fft_ief -name "*.c") \
- $(shell find $(NDSPLIB_PATH)/library/twiddles -name "*.c")
+ $(shell find $(NDSPLIB_PATH)/library/twiddles -name "*.c") \
+ $(shell find $(NDSPLIB_PATH)/library -name "version.c")
EXCLUDED_NNLIB_SRCS = \
$(NNLIB_PATH)/algo/layers/cnn/src/xa_nn_cnn_api.c \
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh
index d80855f..2c81710 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_download.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -38,12 +38,8 @@
source ${3}tensorflow/lite/micro/tools/make/bash_helpers.sh
DOWNLOADS_DIR=${1}
-if [ ! -d ${DOWNLOADS_DIR} ]; then
- echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
- exit 1
-fi
-if [[ ${2} == "hifi4" ]]; then
+if [[ ${2} == "hifi4" || ${2} == "hifi3" ]]; then
LIBRARY_URL="http://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_hifi4_09_05_2023.zip"
LIBRARY_DIRNAME="xa_nnlib_hifi4"
LIBRARY_MD5="2a54e056aef73a4fcffde4643998501a"
@@ -62,35 +58,30 @@
LIBRARY_INSTALL_PATH=${DOWNLOADS_DIR}/${LIBRARY_DIRNAME}
-if [ -d ${LIBRARY_INSTALL_PATH} ]; then
+should_download=$(check_should_download ${DOWNLOADS_DIR})
+
+if [[ ${should_download} == "no" ]]; then
+ show_download_url_md5 ${LIBRARY_URL} ${LIBRARY_MD5}
+elif [ ! -d ${DOWNLOADS_DIR} ]; then
+ echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
+ exit 1
+elif [ -d ${LIBRARY_INSTALL_PATH} ]; then
echo >&2 "${LIBRARY_INSTALL_PATH} already exists, skipping the download."
else
TEMPDIR="$(mktemp -d)"
TEMPFILE="${TEMPDIR}/${LIBRARY_DIRNAME}.zip"
wget ${LIBRARY_URL} -O "$TEMPFILE" >&2
- MD5=`md5sum "$TEMPFILE" | awk '{print $1}'`
+ check_md5 "${TEMPFILE}" ${LIBRARY_MD5}
- if [[ ${MD5} != ${LIBRARY_MD5} ]]
- then
- echo "Bad checksum. Expected: ${LIBRARY_MD5}, Got: ${MD5}"
- exit 1
- fi
+ unzip -qo "$TEMPFILE" -d ${DOWNLOADS_DIR} >&2
- # Check if another make process has already extracted the downloaded files.
- # If so, skip extracting and patching.
- if [ -d ${LIBRARY_INSTALL_PATH} ]; then
- echo >&2 "${LIBRARY_INSTALL_PATH} already exists, skipping the extraction."
- else
- unzip -qo "$TEMPFILE" -d ${DOWNLOADS_DIR} >&2
+ rm -rf "${TEMPDIR}"
- rm -rf "${TEMPDIR}"
-
- pushd "${LIBRARY_INSTALL_PATH}" > /dev/null
- chmod -R +w ./
- if [[ -f "../../ext_libs/xa_nnlib_${2}.patch" ]]; then
- create_git_repo ./
- apply_patch_to_folder ./ "../../ext_libs/xa_nnlib_${2}.patch" "TFLM patch"
- fi
+ pushd "${LIBRARY_INSTALL_PATH}" > /dev/null
+ chmod -R +w ./
+ if [[ -f "../../ext_libs/xa_nnlib_${2}.patch" ]]; then
+ create_git_repo ./
+ apply_patch_to_folder ./ "../../ext_libs/xa_nnlib_${2}.patch" "TFLM patch"
fi
fi
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh
index 81bf848..71fe1d1 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_ndsp_download.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -38,10 +38,6 @@
source ${3}tensorflow/lite/micro/tools/make/bash_helpers.sh
DOWNLOADS_DIR=${1}
-if [ ! -d ${DOWNLOADS_DIR} ]; then
- echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
- exit 1
-fi
if [[ ${2} == "hifi3" ]]; then
COMMIT="d17bf205dc530a9e1a1d979249520f4401529db1"
@@ -68,42 +64,37 @@
LIBRARY_INSTALL_PATH=${DOWNLOADS_DIR}/${LIBRARY_DIRNAME}
-if [ -d ${LIBRARY_INSTALL_PATH} ]; then
+should_download=$(check_should_download ${DOWNLOADS_DIR})
+
+if [[ ${should_download} == "no" ]]; then
+ show_download_url_md5 ${LIBRARY_URL} ${LIBRARY_MD5}
+elif [ ! -d ${DOWNLOADS_DIR} ]; then
+ echo "The top-level downloads directory: ${DOWNLOADS_DIR} does not exist."
+ exit 1
+elif [ -d ${LIBRARY_INSTALL_PATH} ]; then
echo >&2 "${LIBRARY_INSTALL_PATH} already exists, skipping the download."
else
TEMPDIR="$(mktemp -d)"
TEMPFILE="${TEMPDIR}/${LIBRARY_DIRNAME}.zip"
wget ${LIBRARY_URL} -O "$TEMPFILE" >&2
- MD5=`md5sum "$TEMPFILE" | awk '{print $1}'`
+ check_md5 "${TEMPFILE}" ${LIBRARY_MD5}
- if [[ ${MD5} != ${LIBRARY_MD5} ]]
- then
- echo "Bad checksum. Expected: ${LIBRARY_MD5}, Got: ${MD5}"
- exit 1
+ unzip -qo "$TEMPFILE" -d ${TEMPDIR} >&2
+ unzip -qo ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/NDSP_${CORE_NAME}*.zip -d ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/ >&2
+ find ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/* -maxdepth 0 -type d -exec mv {} ${LIBRARY_INSTALL_PATH} \;
+ rm -rf "${TEMPDIR}"
+ # NDSP sources in GitHub currently uses DOS style newlines, which causes compiler errors.
+ find ${LIBRARY_INSTALL_PATH} -type f -exec sed -i.bak 's/\r$//g' {} \;
+
+ pushd "${LIBRARY_INSTALL_PATH}" > /dev/null
+ chmod -R +w ./
+ if [[ -f "../../ext_libs/ndsplib-${2}.patch" ]]; then
+ create_git_repo ./
+ apply_patch_to_folder ./ "../../ext_libs/ndsplib-${2}.patch" "TFLM patch"
fi
-
- # Check if another make process has already extracted the downloaded files.
- # If so, skip extracting and patching.
- if [ -d ${LIBRARY_INSTALL_PATH} ]; then
- echo >&2 "${LIBRARY_INSTALL_PATH} already exists, skipping the extraction."
- else
- unzip -qo "$TEMPFILE" -d ${TEMPDIR} >&2
- unzip -qo ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/NDSP_${CORE_NAME}*.zip -d ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/ >&2
- find ${TEMPDIR}/${LIBRARY_DIRNAME}-${COMMIT}/NDSP_${CORE_NAME}/* -maxdepth 0 -type d -exec mv {} ${LIBRARY_INSTALL_PATH} \;
- rm -rf "${TEMPDIR}"
- # NDSP sources in GitHub currently uses DOS style newlines, which causes compiler errors.
- find ${LIBRARY_INSTALL_PATH} -type f -exec sed -i.bak 's/\r$//g' {} \;
-
- pushd "${LIBRARY_INSTALL_PATH}" > /dev/null
- chmod -R +w ./
- if [[ -f "../../ext_libs/ndsplib-${2}.patch" ]]; then
- create_git_repo ./
- apply_patch_to_folder ./ "../../ext_libs/ndsplib-${2}.patch" "TFLM patch"
- fi
- # Rename the strings in __renaming__.h to names that are traceable to TFLM.
- # Note that renaming is disabled by default and must be enabled with -D__RENAMING__
- sed -i 's/NatureDSP_/NatureDSP_TFLM_/' library/include_private/__renaming__.h
- fi
+ # Rename the strings in __renaming__.h to names that are traceable to TFLM.
+ # Note that renaming is disabled by default and must be enabled with -D__RENAMING__
+ sed -i 's/NatureDSP_/NatureDSP_TFLM_/' library/include_private/__renaming__.h
fi
echo "SUCCESS"