Enable hifi5 optimized kernels (#2555)
Several of the xtensa optimized kernels were guarded with #ifdefs for HIFI3 and HIFI4, but were missing HIFI5. Those kernels have also been enabled via the hifi5 nnlib, so we can add the defines.
BUG=none
diff --git a/tensorflow/lite/micro/kernels/xtensa/conv.cc b/tensorflow/lite/micro/kernels/xtensa/conv.cc
index 0955b12..384dba9 100644
--- a/tensorflow/lite/micro/kernels/xtensa/conv.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/conv.cc
@@ -91,7 +91,7 @@
#endif
}
case kTfLiteInt16: {
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
// Note that int32 bias is not widely supported and might be risky (e.g.
// http://b/262003750). As such, while we have a fallback to the reference
// implementation, production use-cases should only have int64 bias.
diff --git a/tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc b/tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc
index 45c8f4b..1d2d7ec 100644
--- a/tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc
@@ -55,14 +55,14 @@
/* TODO(b/277112516): Dilation is currently not supported on HiFi 4 NN Library
*/
bool inputs_and_bias_ok = bias != nullptr;
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
inputs_and_bias_ok =
inputs_and_bias_ok &&
(input->type == kTfLiteInt8 ||
(input->type == kTfLiteInt16 && bias->type == kTfLiteInt64));
#else
inputs_and_bias_ok = inputs_and_bias_ok && (input->type == kTfLiteInt8);
-#endif // defined(HIFI3) || defined(HIFI4)
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
if (!(inputs_and_bias_ok && params->dilation_width_factor == 1 &&
params->dilation_height_factor == 1 &&
input_shape.Dims(1) >= filter_shape.Dims(1) &&
@@ -115,7 +115,7 @@
return kTfLiteOk;
}
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
TfLiteStatus ConvEvalHifiInt16(TfLiteContext* context, TfLiteNode* node,
const TfLiteConvParams& params,
const XtensaConvOpData& data,
@@ -210,7 +210,7 @@
return kTfLiteOk;
}
-#endif // defined(HIFI3) || defined(HIFI4)
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
TfLiteStatus ConvEvalHifiInt8(TfLiteContext* context, TfLiteNode* node,
const TfLiteConvParams& params,
diff --git a/tensorflow/lite/micro/kernels/xtensa/conv_int8_int16.cc b/tensorflow/lite/micro/kernels/xtensa/conv_int8_int16.cc
index d46546d..ed64f01 100644
--- a/tensorflow/lite/micro/kernels/xtensa/conv_int8_int16.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/conv_int8_int16.cc
@@ -54,7 +54,7 @@
}
TfLiteStatus EvalInt16(TfLiteContext* context, TfLiteNode* node) {
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
const auto& op_data = *(reinterpret_cast<XtensaConvOpData*>(node->user_data));
const auto& params =
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
diff --git a/tensorflow/lite/micro/kernels/xtensa/leaky_relu.cc b/tensorflow/lite/micro/kernels/xtensa/leaky_relu.cc
index 61ab985..c1ed1d6 100644
--- a/tensorflow/lite/micro/kernels/xtensa/leaky_relu.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/leaky_relu.cc
@@ -76,7 +76,7 @@
return kTfLiteOk;
} break;
case kTfLiteInt16: {
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -89,7 +89,7 @@
if (err != 0) return kTfLiteError;
#else
QuantizeLeakyRelu<int16_t>(data, input, output);
-#endif // defined(HIFI3) || defined(HIFI4)
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
return kTfLiteOk;
} break;
default:
diff --git a/tensorflow/lite/micro/kernels/xtensa/pad.cc b/tensorflow/lite/micro/kernels/xtensa/pad.cc
index 86d0ab3..d822c28 100644
--- a/tensorflow/lite/micro/kernels/xtensa/pad.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/pad.cc
@@ -215,7 +215,7 @@
constant_values == nullptr
? 0
: *tflite::micro::GetTensorData<int16_t>(constant_values);
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
/* NNLib currently only supports up to 4D input tensors */
if (tflite::micro::GetTensorShape(input).DimensionsCount() == 4) {
const TfLiteEvalTensor* paddings =
@@ -233,14 +233,14 @@
pad_value);
if (err != 0) return kTfLiteError;
} else {
-#endif // defined(HIFI3) || defined(HIFI4)
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
}
-#endif // defined(HIFI3) || defined(HIFI4)
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
} break;
case kTfLiteInt32: {
int32_t pad_value =
diff --git a/tensorflow/lite/micro/kernels/xtensa/strided_slice.cc b/tensorflow/lite/micro/kernels/xtensa/strided_slice.cc
index 4b5fe92..8ebf724 100644
--- a/tensorflow/lite/micro/kernels/xtensa/strided_slice.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/strided_slice.cc
@@ -30,7 +30,7 @@
namespace tflite {
namespace {
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
void StridedSlice_int16_hifi4opt(const tflite::StridedSliceParams& op_params,
const RuntimeShape& unextended_input_shape,
const int16_t* input_data,
@@ -77,7 +77,7 @@
input_shape.Dims(1), input_shape.Dims(2),
input_shape.Dims(3), input_shape.Dims(4));
}
-#endif // defined(HIFI3) || defined(HIFI4)
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
@@ -104,7 +104,7 @@
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
-#if defined(HIFI3) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
StridedSlice_int16_hifi4opt(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
@@ -116,7 +116,7 @@
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
-#endif // defined(HIFI3) || defined(HIFI4)
+#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
break;
case kTfLiteInt32:
reference_ops::StridedSlice(
diff --git a/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h b/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h
index 16fbc9f..f804a6d 100644
--- a/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h
+++ b/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h
@@ -51,7 +51,6 @@
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output);
-#if defined(HIFI3) || defined(HIFI4)
TfLiteStatus ConvEvalHifiInt16(TfLiteContext* context, TfLiteNode* node,
const TfLiteConvParams& params,
const XtensaConvOpData& data,
@@ -59,7 +58,6 @@
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output);
-#endif // defined(HIFI3) || defined(HIFI4)
#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)