Use 1x1 d=32n kernel more aggressively.
We can enable oc<8.
Change-Id: I10b07e77c8e4bd4b07245ee08e661d20be1cfd64
diff --git a/tflm/opt/conv_s8.cc b/tflm/opt/conv_s8.cc
index 079e1f8..64e22e1 100644
--- a/tflm/opt/conv_s8.cc
+++ b/tflm/opt/conv_s8.cc
@@ -209,6 +209,11 @@
RUN_KERNEL(kelvin::opt::ConvS8K1x1D32);
}
+ // TODO: Relax this kernel for all output_depths
+ if ((output_depth < 8) && (input_depth % 32) == 0) {
+ RUN_KERNEL(kelvin::opt::ConvS8K1x1D32);
+ }
+
if ((output_depth % 8) == 0 && (input_depth == 16)) {
RUN_KERNEL(kelvin::opt::ConvS8K1x1D16);
}