Update examples and documentation for Arm(R) Corstone(TM)-300 FVP (#2503)
* Updates benchmark documentation.
* Updates benchmarks and network tester example.
* Select only needed ops for memory measurements.
* Only pip install if needed.
* Also gen folder output will be created differently depending on also toolchain and type of kernels
BUG=documentation for the benchmark application is not correct
diff --git a/tensorflow/lite/micro/benchmarks/README.md b/tensorflow/lite/micro/benchmarks/README.md
index 390b27a..1ac5d25 100644
--- a/tensorflow/lite/micro/benchmarks/README.md
+++ b/tensorflow/lite/micro/benchmarks/README.md
@@ -70,26 +70,17 @@
For more info about the Corstone-300 software see:
[tensorflow/lite/micro/cortex_m_corstone_300/README.md](../cortex_m_corstone_300/README.md).
-Disclaimer: Executing the benchmark test on the Corstone-300 software will
-provide a general metric of instructions executed. The estimates are not cycle
-accurate, however it aligns to instruction per cycle, and is a consistent
-environment. This means it can detect if code changes changed performance.
+Disclaimer: The FVP can not be used to measure CPU performance.
+The results are not reliable, not even for relative measurements.
+FVP may however be used for performance measurements when running on NPU and only NPU PMU numbers can be used. The NPU model is cycle accurate within approximately +-10%.
-The person detection benchmark can also run with Ethos-U enabled, as the
-downloaded model will be optimized for Ethos-U. For more info see:
+As an example, the person detect downloaded model will be optimized for Ethos-U. For more info see:
[tensorflow/lite/micro/kernels/ethos_u/README.md](../kernels/ethos_u/README.md).
+And since it only makes sense to measure performance on the NPU, only the person detection benchmark should be run and only with Ethos-U enabled.
+See also network tester example, where person detect model is used in the same way when Ethos-U is enabled:
+[tensorflow/lite/micro/examples/network_tester/README.md](../examples/network_tester/README.md).
-To run the keyword benchmark on FVP:
-
-```
-make -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_keyword_benchmark
-```
-
-To run the person detection benchmark on FVP:
-
-```
-make -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_person_detection_benchmark
-```
+The person detect model is not an optimial model for Ethos-U since it quite small. Also note that only the NPU PMU cycles are logged even though the CPU is setting up the Ethos-U driver in each iteration.
To run the person detection benchmark on FVP with Ethos-U:
diff --git a/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc b/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc
index e21789b..29d30ee 100644
--- a/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc
+++ b/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -37,7 +37,11 @@
namespace tflite {
+#ifdef ETHOS_U
+using PersonDetectionOpResolver = MicroMutableOpResolver<1>;
+#else
using PersonDetectionOpResolver = MicroMutableOpResolver<6>;
+#endif
using PersonDetectionBenchmarkRunner = MicroBenchmarkRunner<int8_t>;
// Create an area of memory to use for input, output, and intermediate arrays.
@@ -57,12 +61,16 @@
// PersonDetectionBenchmarkRunner object.
PersonDetectionOpResolver* op_resolver =
new (op_resolver_buffer) PersonDetectionOpResolver();
+#ifdef ETHOS_U
+ op_resolver->AddEthosU();
+#else
op_resolver->AddFullyConnected(tflite::Register_FULLY_CONNECTED_INT8());
op_resolver->AddConv2D(tflite::Register_CONV_2D_INT8REF());
op_resolver->AddDepthwiseConv2D();
op_resolver->AddSoftmax();
op_resolver->AddAveragePool2D(tflite::Register_AVERAGE_POOL_2D_INT8());
op_resolver->AddReshape();
+#endif
return new (benchmark_runner_buffer)
PersonDetectionBenchmarkRunner(g_person_detect_model_data, op_resolver,
tensor_arena, kTensorArenaSize, profiler);
diff --git a/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc b/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc
index 95a11b2..6473340 100644
--- a/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc
+++ b/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -14,33 +14,90 @@
==============================================================================*/
#ifdef ETHOS_U
+#include <inttypes.h>
+
+#include <algorithm>
+
#include "ethosu_driver.h"
+#include "pmu_ethosu.h"
#endif
// This is set in micro/tools/make/targets/cortex_m_corstone_300_makefile.inc.
-// It is needed for the calls to NVIC_SetVector()/NVIC_EnableIR() and for the
-// DWT and PMU counters.
+// It is needed for the calls to NVIC_SetVector()/NVIC_EnableIR(),
#include CMSIS_DEVICE_ARM_CORTEX_M_XX_HEADER_FILE
#include "tensorflow/lite/micro/micro_log.h"
#include "tensorflow/lite/micro/micro_time.h"
#include "tensorflow/lite/micro/system_setup.h"
+#ifdef ETHOS_U
+
+bool npuPmuCycleCounterIsSet;
+uint64_t npuPmuCycleCounter;
+
+extern "C" {
+void ethosu_inference_begin(struct ethosu_driver* drv, void* userArg) {
+ // Enable PMU
+ ETHOSU_PMU_Enable(drv);
+
+ // Enable cycle counter
+ ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(drv, ETHOSU_PMU_NPU_IDLE);
+ ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(drv, ETHOSU_PMU_NPU_ACTIVE);
+ ETHOSU_PMU_CNTR_Enable(drv, ETHOSU_PMU_CCNT_Msk);
+ ETHOSU_PMU_CYCCNT_Reset(drv);
+
+ // Reset all counters
+ ETHOSU_PMU_EVCNTR_ALL_Reset(drv);
+}
+
+void ethosu_inference_end(struct ethosu_driver* drv, void* userArg) {
+ // Save cycle counter
+ npuPmuCycleCounter += ETHOSU_PMU_Get_CCNTR(drv);
+ npuPmuCycleCounterIsSet = true;
+
+ // Disable PMU
+ ETHOSU_PMU_Disable(drv);
+}
+}
+#endif
+
namespace tflite {
namespace {
+#ifdef ETHOS_U
+constexpr uint32_t kClocksPerSecond = 200e6;
+#else
constexpr uint32_t kClocksPerSecond = 25e6;
+#endif
} // namespace
uint32_t ticks_per_second() { return kClocksPerSecond; }
uint32_t GetCurrentTimeTicks() {
-#if (!defined(TF_LITE_STRIP_ERROR_STRINGS) && !defined(ARMCM0))
+#if (!defined(TF_LITE_STRIP_ERROR_STRINGS))
+#ifdef ETHOS_U
+ uint32_t ticks = static_cast<uint32_t>(npuPmuCycleCounter);
+
+ // Note cycle counter will be reset here for next iteration
+ if (npuPmuCycleCounterIsSet) {
+ npuPmuCycleCounter = 0;
+ npuPmuCycleCounterIsSet = false;
+ }
+
+ return ticks;
+#else
+
+#if defined(ARMCM0)
+ return 0;
+#else
#ifdef ARMCM55
return ARM_PMU_Get_CCNTR();
#else
return DWT->CYCCNT;
#endif
+#endif
+
+#endif
#else
return 0;
#endif
@@ -88,14 +145,17 @@
#ifdef ETHOS_U
constexpr int ethosu_base_address = 0x48102000;
constexpr int ethosu_irq = 56;
+ constexpr int ethosu_irq_priority = 5;
// Initialize Ethos-U NPU driver.
if (ethosu_init(ðosu0_driver, reinterpret_cast<void*>(ethosu_base_address),
ethosu0_scratch, ETHOSU_FAST_MEMORY_SIZE, 1, 1)) {
MicroPrintf("Failed to initialize Ethos-U driver");
+ return;
}
NVIC_SetVector(static_cast<IRQn_Type>(ethosu_irq),
(uint32_t)ðosuIrqHandler0);
+ NVIC_SetPriority(static_cast<IRQn_Type>(ethosu_irq), ethosu_irq_priority);
NVIC_EnableIRQ(static_cast<IRQn_Type>(ethosu_irq));
#endif
}
diff --git a/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc b/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc
index e62e0c4..23e50c9 100644
--- a/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc
+++ b/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@
#define NUM_INFERENCES 1
#endif
-uint8_t tensor_arena[TENSOR_ARENA_SIZE];
+alignas(16) uint8_t tensor_arena[TENSOR_ARENA_SIZE];
#ifdef NUM_BYTES_TO_PRINT
inline void print_output_data(TfLiteTensor* output) {
@@ -92,15 +92,19 @@
model->version(), TFLITE_SCHEMA_VERSION);
return kTfLiteError;
}
+#ifdef ETHOS_U
+ tflite::MicroMutableOpResolver<1> resolver;
+ resolver.AddEthosU();
- tflite::MicroMutableOpResolver<6> resolver;
+#else
+ tflite::MicroMutableOpResolver<5> resolver;
resolver.AddAveragePool2D(tflite::Register_AVERAGE_POOL_2D_INT8());
resolver.AddConv2D(tflite::Register_CONV_2D_INT8());
resolver.AddDepthwiseConv2D(tflite::Register_DEPTHWISE_CONV_2D_INT8());
- resolver.AddEthosU();
resolver.AddReshape();
resolver.AddSoftmax(tflite::Register_SOFTMAX_INT8());
+#endif
tflite::MicroInterpreter interpreter(model, resolver, tensor_arena,
TENSOR_ARENA_SIZE);
@@ -152,7 +156,8 @@
}
#endif
}
- MicroPrintf("Ran successfully\n");
+
+ MicroPrintf("~~~ALL TESTS PASSED~~~\n");
}
TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
index bb4a983..213ac0e 100644
--- a/tensorflow/lite/micro/tools/make/Makefile
+++ b/tensorflow/lite/micro/tools/make/Makefile
@@ -272,7 +272,15 @@
# Where compiled objects are stored.
BASE_GENDIR := gen
-GENDIR := $(BASE_GENDIR)/$(TARGET)_$(TARGET_ARCH)_$(BUILD_TYPE)/
+GENDIR := $(BASE_GENDIR)/$(TARGET)_$(TARGET_ARCH)_$(BUILD_TYPE)
+ifneq ($(OPTIMIZED_KERNEL_DIR),)
+ GENDIR := $(GENDIR)_$(OPTIMIZED_KERNEL_DIR)
+endif
+ifneq ($(CO_PROCESSOR),)
+ GENDIR := $(GENDIR)_$(CO_PROCESSOR)
+endif
+GENDIR := $(GENDIR)_$(TOOLCHAIN)/
+
CORE_OBJDIR := $(GENDIR)obj/core/
KERNEL_OBJDIR := $(GENDIR)obj/kernels/
THIRD_PARTY_KERNEL_OBJDIR := $(GENDIR)obj/third_party_kernels/
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh b/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh
index 4b0ee89..0d9df87 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/person_detection_int8_vela_convert.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -50,16 +50,28 @@
PERSON_MODEL_HEADER=${MODEL_DIR}/person_detect_model_data.h
if [ ! -f ${CONVERTED_PERSON_MODEL_INT8} ]; then
+
+ # Install ethos-u-vela if not already installed.
+ set +e
+ pip show ethos-u-vela >&2
+ retVal=$?
+ set -e
+ if [ $retVal -ne 0 ]; then
+ TEMPFILE=$(mktemp -d)/
+ python3 -m venv $TEMPFILE
+ source $TEMPFILE/bin/activate
+ python3 -m pip install --upgrade pip >&2
+ pip install --upgrade cython >&2
+ pip install --prefer-binary ethos-u-vela >&2
+ fi
+
# Compile an optimized .tflite version for Ethos-U.
- TEMPFILE=$(mktemp -d)/
- python3 -m venv $TEMPFILE
- source $TEMPFILE/bin/activate
- python3 -m pip install --upgrade pip >&2
- pip install --upgrade cython >&2
- pip install --prefer-binary ethos-u-vela >&2
vela --accelerator-config=ethos-u55-256 ${DOWNLOADS_DIR}/../../../models/person_detect.tflite \
--output-dir ${MODEL_DIR} >&2
- deactivate
+
+ if [ $retVal -ne 0 ]; then
+ deactivate
+ fi
# Convert .tflite back to C array.
echo "// This file is generated by $0." > ${CONVERTED_PERSON_MODEL_INT8}
@@ -67,7 +79,7 @@
${CONVERTED_PERSON_MODEL_INT8}
echo -n "const " >> ${CONVERTED_PERSON_MODEL_INT8}
xxd -i ${MODEL_DIR}/person_detect_vela.tflite >> ${CONVERTED_PERSON_MODEL_INT8}
- sed -i 's/gen_cortex_m_corstone_300_cortex_m55_default_genfiles_tensorflow_lite_micro_models_person_detect_vela_tflite/g_person_detect_model_data/' \
+ sed -i 's/gen_cortex_m_corstone_300_cortex_m55_.*genfiles_tensorflow_lite_micro_models_person_detect_vela_tflite/g_person_detect_model_data/' \
${CONVERTED_PERSON_MODEL_INT8}
sed -i 's/^const unsigned char g_person_detect_model_data/alignas\(16\) &/' ${CONVERTED_PERSON_MODEL_INT8}
SIZE=$(sed -E -n -e 's/^.*g_person_detect_model_data_len = ([0-9]+);/\1/p' ${CONVERTED_PERSON_MODEL_INT8})
diff --git a/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc b/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc
index c9bb8ea..653afad 100644
--- a/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc
+++ b/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc
@@ -204,6 +204,11 @@
EXCLUDED_TESTS := \
$(TENSORFLOW_ROOT)tensorflow/lite/micro/memory_arena_threshold_test.cc \
$(TENSORFLOW_ROOT)tensorflow/lite/micro/recording_micro_allocator_test.cc
+ifeq ($(CO_PROCESSOR), ethos_u)
+# This does not work with Ethos-U enabled since then NPU PMU counters are used instead for the sake of the benchmark example.
+EXCLUDED_TESTS += \
+ $(TENSORFLOW_ROOT)tensorflow/lite/micro/micro_time_test.cc
+endif
MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS))
# TODO(#2449) Examine why this test fails here.