CMSIS-NN LSTM issue fixes (#2504)
- Remove read of non-initialized buffer
- Adds back integer unidirectional_sequence_lstm_test for CMSIS-NN
- Adds back dtln example using CMSIS-NN
- Reverts "Add intermediate tensors to LSTM unit test to enable CMSIS-NN (#1996)"
BUG=#2449
diff --git a/tensorflow/lite/micro/kernels/cmsis_nn/unidirectional_sequence_lstm.cc b/tensorflow/lite/micro/kernels/cmsis_nn/unidirectional_sequence_lstm.cc
index 27e31f5..75ba5ea 100644
--- a/tensorflow/lite/micro/kernels/cmsis_nn/unidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/micro/kernels/cmsis_nn/unidirectional_sequence_lstm.cc
@@ -37,20 +37,41 @@
cmsis_nn_lstm_params params_cmsis_nn; // Used for CMSIS-NN implementation
};
-TfLiteStatus PortOpData_s8(TfLiteContext* context, OpDataLSTM* params_ref,
- const LSTMKernelContents& kernel_content,
- cmsis_nn_lstm_params* params_cmsis_nn) {
+LSTMBuffers<int16_t> CMSIS_NN_CreateLSTMBuffers(TfLiteContext* context,
+ const int* buffer_indices) {
+ LSTMBuffers<int16_t> buffers;
+ buffers.buffer0 = reinterpret_cast<int16_t*>(
+ context->GetScratchBuffer(context, buffer_indices[0]));
+ buffers.buffer1 = reinterpret_cast<int16_t*>(
+ context->GetScratchBuffer(context, buffer_indices[1]));
+ buffers.buffer2 = reinterpret_cast<int16_t*>(
+ context->GetScratchBuffer(context, buffer_indices[2]));
+
+ return buffers;
+}
+
+void CMSIS_NN_VectorSum(int32_t* kernel_sum, const int32_t size1,
+ const int32_t size2, const int8_t* weights,
+ const int32_t offset, const int32_t* biases) {
+ arm_vector_sum_s8(kernel_sum, size1, size2, weights, offset, biases);
+}
+
+template <typename BiasType>
+TfLiteStatus CMSIS_NN_PortOpData(TfLiteContext* context, OpDataLSTM* params_ref,
+ const LSTMKernelContents& kernel_content,
+ cmsis_nn_lstm_params* params_cmsis_nn) {
// Unwrap pointers
- const int32_t* input_gate_bias =
- tflite::micro::GetOptionalTensorData<int32_t>(
+ const BiasType* input_gate_bias =
+ tflite::micro::GetOptionalTensorData<BiasType>(
kernel_content.GetInternalTensor(tflite::kLstmInputGateBiasTensor));
- const int32_t* forget_gate_bias =
- tflite::micro::GetOptionalTensorData<int32_t>(
+ const BiasType* forget_gate_bias =
+ tflite::micro::GetOptionalTensorData<BiasType>(
kernel_content.GetInternalTensor(tflite::kLstmForgetGateBiasTensor));
- const int32_t* cell_gate_bias = tflite::micro::GetOptionalTensorData<int32_t>(
- kernel_content.GetInternalTensor(tflite::kLstmCellGateBiasTensor));
- const int32_t* output_gate_bias =
- tflite::micro::GetOptionalTensorData<int32_t>(
+ const BiasType* cell_gate_bias =
+ tflite::micro::GetOptionalTensorData<BiasType>(
+ kernel_content.GetInternalTensor(tflite::kLstmCellGateBiasTensor));
+ const BiasType* output_gate_bias =
+ tflite::micro::GetOptionalTensorData<BiasType>(
kernel_content.GetInternalTensor(tflite::kLstmOutputGateBiasTensor));
const int8_t* input_to_input_weights =
@@ -90,72 +111,72 @@
int32_t size_data = params_ref->size_info.input_dimension;
int32_t size_hidden = params_ref->size_info.state_dimension;
- int32_t* input_data_kernel_sum{
- static_cast<int32_t*>(context->AllocatePersistentBuffer(
- context, size_hidden * sizeof(int32_t)))};
- int32_t* forget_data_kernel_sum{
- static_cast<int32_t*>(context->AllocatePersistentBuffer(
- context, size_hidden * sizeof(int32_t)))};
- int32_t* cell_data_kernel_sum{
- static_cast<int32_t*>(context->AllocatePersistentBuffer(
- context, size_hidden * sizeof(int32_t)))};
- int32_t* output_data_kernel_sum{
- static_cast<int32_t*>(context->AllocatePersistentBuffer(
- context, size_hidden * sizeof(int32_t)))};
+ BiasType* input_data_kernel_sum{
+ static_cast<BiasType*>(context->AllocatePersistentBuffer(
+ context, size_hidden * sizeof(BiasType)))};
+ BiasType* forget_data_kernel_sum{
+ static_cast<BiasType*>(context->AllocatePersistentBuffer(
+ context, size_hidden * sizeof(BiasType)))};
+ BiasType* cell_data_kernel_sum{
+ static_cast<BiasType*>(context->AllocatePersistentBuffer(
+ context, size_hidden * sizeof(BiasType)))};
+ BiasType* output_data_kernel_sum{
+ static_cast<BiasType*>(context->AllocatePersistentBuffer(
+ context, size_hidden * sizeof(BiasType)))};
- int32_t* input_hidden_kernel_sum{
- static_cast<int32_t*>(context->AllocatePersistentBuffer(
- context, size_hidden * sizeof(int32_t)))};
- int32_t* forget_hidden_kernel_sum{
- static_cast<int32_t*>(context->AllocatePersistentBuffer(
- context, size_hidden * sizeof(int32_t)))};
- int32_t* cell_hidden_kernel_sum = {
- static_cast<int32_t*>(context->AllocatePersistentBuffer(
- context, size_hidden * sizeof(int32_t)))};
- int32_t* output_hidden_kernel_sum = {
- static_cast<int32_t*>(context->AllocatePersistentBuffer(
- context, size_hidden * sizeof(int32_t)))};
+ BiasType* input_hidden_kernel_sum{
+ static_cast<BiasType*>(context->AllocatePersistentBuffer(
+ context, size_hidden * sizeof(BiasType)))};
+ BiasType* forget_hidden_kernel_sum{
+ static_cast<BiasType*>(context->AllocatePersistentBuffer(
+ context, size_hidden * sizeof(BiasType)))};
+ BiasType* cell_hidden_kernel_sum = {
+ static_cast<BiasType*>(context->AllocatePersistentBuffer(
+ context, size_hidden * sizeof(BiasType)))};
+ BiasType* output_hidden_kernel_sum = {
+ static_cast<BiasType*>(context->AllocatePersistentBuffer(
+ context, size_hidden * sizeof(BiasType)))};
// Compute effective biases
- arm_vector_sum_s8(
+ CMSIS_NN_VectorSum(
input_data_kernel_sum, size_data, size_hidden, input_to_input_weights,
params_ref->input_gate_parameters.input_fc_params.input_offset,
input_gate_bias);
- arm_vector_sum_s8(
+ CMSIS_NN_VectorSum(
forget_data_kernel_sum, size_data, size_hidden, input_to_forget_weights,
params_ref->forget_gate_parameters.input_fc_params.input_offset,
forget_gate_bias);
- arm_vector_sum_s8(
+ CMSIS_NN_VectorSum(
cell_data_kernel_sum, size_data, size_hidden, input_to_cell_weights,
params_ref->cell_gate_parameters.input_fc_params.input_offset,
cell_gate_bias);
- arm_vector_sum_s8(
+ CMSIS_NN_VectorSum(
output_data_kernel_sum, size_data, size_hidden, input_to_output_weights,
params_ref->output_gate_parameters.input_fc_params.input_offset,
output_gate_bias);
- arm_vector_sum_s8(
+ CMSIS_NN_VectorSum(
input_hidden_kernel_sum, size_hidden, size_hidden,
recurrent_to_input_weights,
-params_ref->inter_gate_parameters.output_mul_params.output_offset,
nullptr);
- arm_vector_sum_s8(
+ CMSIS_NN_VectorSum(
forget_hidden_kernel_sum, size_hidden, size_hidden,
recurrent_to_forget_weights,
-params_ref->inter_gate_parameters.output_mul_params.output_offset,
nullptr);
- arm_vector_sum_s8(
+ CMSIS_NN_VectorSum(
cell_hidden_kernel_sum, size_hidden, size_hidden,
recurrent_to_cell_weights,
-params_ref->inter_gate_parameters.output_mul_params.output_offset,
nullptr);
- arm_vector_sum_s8(
+ CMSIS_NN_VectorSum(
output_hidden_kernel_sum, size_hidden, size_hidden,
recurrent_to_output_weights,
-params_ref->inter_gate_parameters.output_mul_params.output_offset,
@@ -242,10 +263,9 @@
return kTfLiteOk;
}
-template <typename CellType>
TfLiteStatus CMSIS_NN_EvalInteger8x8_16Lstm(
const OpData& op_data, const LSTMKernelContents& kernel_content,
- const LSTMBuffers<CellType>& buffers) {
+ const LSTMBuffers<int16_t>& buffers) {
TFLITE_DCHECK(
kernel_content.GetInternalTensor(tflite::kLstmInputTensor)->dims->size >=
2 &&
@@ -270,7 +290,6 @@
}
/*Kernel functions*/
-
void* UnidirectionalSequenceLstmInit(TfLiteContext* context, const char* buffer,
size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
@@ -327,20 +346,15 @@
}
size_t number_of_buffers;
- if (activation_type != kTfLiteInt8) {
- number_of_buffers = 4;
+ if (activation_type == kTfLiteInt8 && cell_state_type == kTfLiteInt16) {
+ auto kernel_content = CreateLSTMKernelContent(context, node);
+ number_of_buffers = 3;
+ CMSIS_NN_PortOpData<int32_t>(context, op_data_lstm, kernel_content,
+ &op_data->params_cmsis_nn);
} else {
- bool cmsis_nn_used = (cell_state_type == kTfLiteInt16);
- if (cmsis_nn_used) {
- auto kernel_content = CreateLSTMKernelContent(context, node);
- PortOpData_s8(context, op_data_lstm, kernel_content,
- &op_data->params_cmsis_nn);
-
- number_of_buffers = 3;
- } else {
- number_of_buffers = 4;
- }
+ number_of_buffers = 4;
}
+
for (size_t i = 0; i < number_of_buffers; i++) {
TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
context,
@@ -379,9 +393,9 @@
case kTfLiteInt8: {
// 8(activation)x8(weight)->16(cell) LSTM with 32 bits bias
LSTMBuffers<int16_t> buffers =
- CreateLSTMBuffers<int16_t>(context, op_data_lstm.buffer_indices);
- return CMSIS_NN_EvalInteger8x8_16Lstm<int16_t>(
- op_data, kernel_content, buffers);
+ CMSIS_NN_CreateLSTMBuffers(context, op_data_lstm.buffer_indices);
+ return CMSIS_NN_EvalInteger8x8_16Lstm(op_data, kernel_content,
+ buffers);
break;
}
default: {
@@ -435,10 +449,9 @@
if (activation_type == kTfLiteInt8) {
LSTMBuffers<int16_t> buffers =
- CreateLSTMBuffers<int16_t>(context, op_data_lstm.buffer_indices);
+ CMSIS_NN_CreateLSTMBuffers(context, op_data_lstm.buffer_indices);
- return CMSIS_NN_EvalInteger8x8_16Lstm<int16_t>(op_data, kernel_content,
- buffers);
+ return CMSIS_NN_EvalInteger8x8_16Lstm(op_data, kernel_content, buffers);
} else {
MicroPrintf("Input type %s (%d) not supported.",
TfLiteTypeGetName(activation_type), activation_type);
diff --git a/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm_test.cc b/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm_test.cc
index 06e8e73..1e5a868 100644
--- a/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm_test.cc
+++ b/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm_test.cc
@@ -28,7 +28,6 @@
namespace {
constexpr int kLstmMaxNumInputOutputTensors = 24 + 1;
-constexpr int kLstmIntermediateTensorBase = kLstmMaxNumInputOutputTensors + 1;
// Validate the output result array with golden values
template <typename T>
@@ -50,42 +49,20 @@
LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
time_steps, input_dimension, state_dimension>&
node_contents) {
- TfLiteTensor tensors[kLstmMaxNumInputOutputTensors + 1 + 5];
- memcpy(tensors, node_contents.GetTensors(),
- kLstmMaxNumInputOutputTensors * sizeof(TfLiteTensor));
-
- // Provide also intermediate tensors needed by older LSTM implementations
- int intermediate_array_data[6] = {5,
- kLstmIntermediateTensorBase,
- kLstmIntermediateTensorBase + 1,
- kLstmIntermediateTensorBase + 2,
- kLstmIntermediateTensorBase + 3,
- kLstmIntermediateTensorBase + 4};
- int input_zero_points[2] = {1, -21};
- float input_scales[2] = {1, 0.004705882165580988};
- TfLiteAffineQuantization input_quant = {
- tflite::testing::FloatArrayFromFloats(input_scales),
- tflite::testing::IntArrayFromInts(input_zero_points), 0};
- int intermediate_dim[2] = {1, 0};
- for (int i = 0; i < 5; ++i) {
- tensors[kLstmIntermediateTensorBase + i] =
- CreateTensor<int16_t>(nullptr, IntArrayFromInts(intermediate_dim));
- tensors[kLstmIntermediateTensorBase + i].quantization = {
- kTfLiteAffineQuantization, &input_quant};
- }
-
const TFLMRegistration registration = Register_UNIDIRECTIONAL_SEQUENCE_LSTM();
auto buildin_data = node_contents.BuiltinData();
micro::KernelRunner runner(
- registration, tensors, kLstmMaxNumInputOutputTensors + 1 + 5,
+ registration, node_contents.GetTensors(), kLstmMaxNumInputOutputTensors,
node_contents.KernelInputs(), node_contents.KernelOutputs(),
- reinterpret_cast<void*>(&buildin_data),
- IntArrayFromInts(intermediate_array_data));
+ reinterpret_cast<void*>(&buildin_data));
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare());
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke());
const auto& quantization_settings = node_contents.QuantizationSettings();
+// CMSIS-NN does not use the hidden state and cell state tensors so these tests
+// fail.
+#if !defined(CMSIS_NN)
float dequantized_hidden_state[batch_size * state_dimension] = {};
Dequantize(node_contents.GetHiddenStateData(), batch_size * state_dimension,
quantization_settings.hidden_state.scale,
@@ -104,6 +81,7 @@
ValidateResultGoldens(eval_check_data.expected_cell_state,
dequantized_cell_state, batch_size * state_dimension,
cell_state_tolerance);
+#endif
float dequantized_output[batch_size * state_dimension * time_steps] = {};
Dequantize(node_contents.GetOutputData(),
@@ -162,9 +140,6 @@
tolerance, float_node_contents);
}
-// TODO(#2249) Unidirectional_sequence_lstm_test fails for new CMSIS-NN lstm
-// implementation
-#if !defined(CMSIS_NN)
TF_LITE_MICRO_TEST(TestUnidirectionalLSTMInt8) {
const tflite::testing::LstmEvalCheckData<12, 4, 12> kernel_eval_data =
tflite::testing::Get2X2LstmEvalCheckData();
@@ -179,7 +154,6 @@
kernel_eval_data, hidden_state_tolerance, cell_state_tolerance,
int8_node_contents);
}
-#endif
TF_LITE_MICRO_TEST(TestUnidirectionalLSTMInt16) {
const tflite::testing::LstmEvalCheckData<12, 4, 12> kernel_eval_data =
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
index 601c4d4..fae77aa 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
@@ -47,9 +47,9 @@
echo >&2 "${DOWNLOADED_CMSIS_NN_PATH} already exists, skipping the download."
else
- ZIP_PREFIX_NN="72e1ebf623ab1660a3e14e4e36fdcddce46f1991"
- CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip"
- CMSIS_NN_MD5="23a623f4eca6c8f11ee5366c2cf61a44"
+ ZIP_PREFIX_NN="6cc31fb36fa330325b2bb0ffde3a7288384e58ab"
+ CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/6cc31fb36fa330325b2bb0ffde3a7288384e58ab.zip"
+ CMSIS_NN_MD5="42000f264b93b7b6cd60c1b507792daf"
# wget is much faster than git clone of the entire repo. So we wget a specific
# version and can then apply a patch, as needed.
diff --git a/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc b/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc
index 653afad..64d0662 100644
--- a/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc
+++ b/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc
@@ -211,9 +211,4 @@
endif
MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS))
-# TODO(#2449) Examine why this test fails here.
-EXCLUDED_EXAMPLE_TESTS := \
- tensorflow/lite/micro/examples/dtln/Makefile.inc
-MICRO_LITE_EXAMPLE_TESTS := $(filter-out $(EXCLUDED_EXAMPLE_TESTS), $(MICRO_LITE_EXAMPLE_TESTS))
-
TEST_SCRIPT := $(TENSORFLOW_ROOT)tensorflow/lite/micro/testing/test_with_arm_corstone_300.sh