Fail per-channel quantized FullyConnected layers (#2602)
The TFLiteConverter recently switched over to using per-channel quantization for all Dense/FullyConnected layers. TFLite-Micro does not yet have support for this, and was using incorrect quantization parameters for FullyConnected layers on newly converted models. Unsurprisingly, this leads to invalid output.
While we intend to add per-channel quantization support for FullyConnected, this PR adds a runtime check for per-channel quantization until it can be supported by individual kernels. If you encounter this runtime error, you can disable the new Converter behavior by setting:
`TfLiteConverter._experimental_disable_per_channel_quantization_for_dense_layers = True` https://github.com/tensorflow/tensorflow/blob/377f47694fa790e98db6665b9adecde00b5e0d68/tensorflow/lite/python/lite.py#L674
BUG=b/324385802
diff --git a/tensorflow/lite/micro/kernels/fully_connected_common.cc b/tensorflow/lite/micro/kernels/fully_connected_common.cc
index 5a8d312..66f8787 100644
--- a/tensorflow/lite/micro/kernels/fully_connected_common.cc
+++ b/tensorflow/lite/micro/kernels/fully_connected_common.cc
@@ -57,6 +57,24 @@
TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
OpDataFullyConnected* data) {
+ // TODO(b/324385802): Support per-channel quantization for FullyConnected.
+ // If you have hit this failure message, you will need to disable this
+ // behavior. This can be done by setting the following flag to true:
+ // TfLiteConverter._experimental_disable_per_channel_quantization_for_dense_layers
+ // https://github.com/tensorflow/tensorflow/blob/377f47694fa790e98db6665b9adecde00b5e0d68/tensorflow/lite/python/lite.py#L674
+ if (filter->quantization.type == kTfLiteAffineQuantization &&
+ filter->quantization.params != nullptr) {
+ TfLiteAffineQuantization* affine_quantization =
+ reinterpret_cast<TfLiteAffineQuantization*>(
+ filter->quantization.params);
+ TF_LITE_ENSURE(context, affine_quantization->scale);
+ TF_LITE_ENSURE_MSG(
+ context, affine_quantization->scale->size == 1,
+ "FullyConnected per-channel quantization not yet supported. Please set "
+ "converter._experimental_disable_per_channel_quantization_for_dense_"
+ "layers = True.");
+ }
+
if (data_type != kTfLiteFloat32) {
double real_multiplier = 0.0;
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
diff --git a/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h b/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
index 3edf420..932b832 100644
--- a/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
+++ b/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
@@ -390,9 +390,9 @@
int state_size_[3] = {2, batch_size, state_dimension};
// see lstm_shared.h for tensor names, the last tensor is the output tensor
- TfLiteTensor tensors_[24 + 1];
+ TfLiteTensor tensors_[24 + 1] = {};
// Use for internel kernel testing
- TfLiteEvalTensor eval_tensors_[24 + 1];
+ TfLiteEvalTensor eval_tensors_[24 + 1] = {};
// indices for the tensors inside the node (required by kernel runner)
int input_tensor_indices_[1 + 24] = {};
// single output (last in the tensors array)
diff --git a/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py b/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py
index 4d80991..342f893 100644
--- a/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py
+++ b/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py
@@ -24,7 +24,7 @@
from tflite_micro.tensorflow.lite.tools import flatbuffer_utils
-#TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test
+# TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test
def create_simple_fc_model():
'''Create a simple model with two fully connected(fc) layers'''
model = tf.keras.models.Sequential([
@@ -60,6 +60,8 @@
EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
]
converter.representative_dataset = representative_dataset_gen
+ # TODO(b/324385802): Support per-channel quantization for FullyConnected.
+ converter._experimental_disable_per_channel_quantization_for_dense_layers = True
return converter.convert()