depthwise conv for 2d kernels

* draft version of kernel is working
*Kernel tirggers for pad =0 and int8
*need to add more kelvin instrcutions

Change-Id: Ifb16c7f7508458cede8bd078c657b0106e348a99
diff --git a/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc b/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc
index 489fa66..6e4a28d 100644
--- a/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc
@@ -88,9 +88,11 @@
           break;
         }
         case kTfLiteInt8: {
-          reference_integer_ops::DepthwiseConvPerChannel(
-              DepthwiseConvParamsQuantized(params, data),
-              data.per_channel_output_multiplier, data.per_channel_output_shift,
+          tflite::DepthwiseParams dw_params =
+              DepthwiseConvParamsQuantized(params, data);
+          kelvin::opt::DepthwiseConv2DKelvin(
+              dw_params, data.per_channel_output_multiplier,
+              data.per_channel_output_shift,
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int8_t>(input),
               tflite::micro::GetTensorShape(filter),
@@ -133,14 +135,13 @@
                 data.per_channel_output_shift,
                 dw_params.quantized_activation_min,
                 dw_params.quantized_activation_max,
-                tflite::micro::GetTensorData<int16_t>(output)
-            );
+                tflite::micro::GetTensorData<int16_t>(output));
             break;
           }
 
           reference_integer_ops::DepthwiseConvPerChannel(
-              dw_params,
-              data.per_channel_output_multiplier, data.per_channel_output_shift,
+              dw_params, data.per_channel_output_multiplier,
+              data.per_channel_output_shift,
               tflite::micro::GetTensorShape(input),
               tflite::micro::GetTensorData<int16_t>(input),
               tflite::micro::GetTensorShape(filter),