Add the ssd post-processing block
Add the ssd post-processing block, including box decoding and
extractions, and non-max suppression (NMS).
This code is refactored based on
https://spacebeaker-review.googlesource.com/c/shodan/sw/vec-iree/+/24503.
The libraries can be built successfully, and the results have been validated.
Change-Id: I30cf18982def7a6926f40fb7b5831038d58604af
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 314fda5..74835f8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -102,6 +102,7 @@
add_subdirectory(risp4ml)
add_subdirectory(samples)
+add_subdirectory(ssd_postprocess)
# Add pigweed support
include($ENV{ROOTDIR}/sw/pigweed/pw_build/pigweed.cmake)
diff --git a/README.md b/README.md
index d352caa..049ca2c 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@
* risp4ml: Vision preprocessing library (Reduced ISP for ML)
* samples: Codegen and execution of ML models based on IREE
* simple_vec_mul: Point-wise vector multiplication examples
+* ssd_postprocess: Vision postprocessing library for Single-Shot Detectors (SSD)
## Build the project
diff --git a/ssd_postprocess/CMakeLists.txt b/ssd_postprocess/CMakeLists.txt
new file mode 100644
index 0000000..94a6e52
--- /dev/null
+++ b/ssd_postprocess/CMakeLists.txt
@@ -0,0 +1,31 @@
+iree_cc_library(
+ NAME
+ box
+ HDRS
+ "box.h"
+ "common.h"
+ SRCS
+ "box.c"
+)
+
+iree_cc_library(
+ NAME
+ nms
+ HDRS
+ "common.h"
+ "nms.h"
+ SRCS
+ "nms.c"
+)
+
+iree_cc_library(
+ NAME
+ pipeline
+ HDRS
+ "pipeline.h"
+ SRCS
+ "pipeline.c"
+ DEPS
+ ::box
+ ::nms
+)
diff --git a/ssd_postprocess/box.c b/ssd_postprocess/box.c
new file mode 100644
index 0000000..ff0e7a8
--- /dev/null
+++ b/ssd_postprocess/box.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// SSD box decoding and extracting
+
+#include "ssd_postprocess/box.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static SsdParams params = {
+ .num_layers = 4,
+ .num_boxes = 1602,
+ .input_height = 320,
+ .input_width = 320,
+ .global_scales = {10, 10, 5, 5}, // y, x, h, w
+ .box_zero_points = {115, 129, 125, 119},
+ .box_scales = {0.0813235, 0.0786732, 0.0687513, 0.0522251},
+ .score_zero_points = {211, 195, 200, 225},
+ .score_scales = {0.177373, 0.121247, 0.100491, 0.0550178},
+ .score_threshold = 0.5,
+ .anchors_per_cell = 3,
+ .anchor_base_size = {24.0, 32.0, 40.0, 48.0, 64.0, 80.0, 96.0, 128.0, 160.0,
+ 192.0, 256.0, 320.0},
+ .anchor_stride = {16, 32, 64, 128}};
+
+// Set SSD parameters
+void set_params(SsdParams* params_in) { params = *params_in; }
+
+static inline float dequantize(int val, int zero_point, float scale) {
+ return scale * (val - zero_point);
+}
+
+static inline float sigmoid(float val) { return 1.0 / (1.0 + expf(-val)); }
+
+// Generate model anchors
+// layer0: 20 * 20 * 3 = 1200
+// layer1: 10 * 10 * 3 = 300
+// layer2: 5 * 5 * 3 = 75
+// layer3: 3 * 3 * 3 = 27
+// total sum: 1602
+static void generate_anchors(BoxCenterEncode* anchors) {
+ int idx = 0;
+ for (int layer = 0; layer < params.num_layers; ++layer) {
+ int height_size = (params.input_height + params.anchor_stride[layer] - 1) /
+ params.anchor_stride[layer];
+ int width_size = (params.input_width + params.anchor_stride[layer] - 1) /
+ params.anchor_stride[layer];
+ for (int h = 0; h < height_size; h++) {
+ for (int w = 0; w < width_size; w++) {
+ for (int base = 0; base < params.anchors_per_cell; ++base) {
+ anchors[idx].y =
+ (float)params.anchor_stride[layer] * h / params.input_height;
+ anchors[idx].x =
+ (float)params.anchor_stride[layer] * w / params.input_width;
+ anchors[idx].h =
+ params.anchor_base_size[layer * params.anchors_per_cell + base] /
+ params.input_height;
+ anchors[idx].w =
+ params.anchor_base_size[layer * params.anchors_per_cell + base] /
+ params.input_width;
+ idx++;
+ }
+ }
+ }
+ }
+}
+
+// Decode boxes (with score) from model inference outputs
+// The locations channel dim is 16 x 3.
+// Each 16 is composed of (4 box coordinates + 6 * 2 landmarks coordinates).
+// We need only the first 4 box coordinates - so want to keep only indexes:
+// 0, 1, 2, 3
+// 16,17,18,19
+// 32,33,34,35
+static void decode_boxes(uint8_t** model_out, BoxCenterEncode* boxes) {
+ const int num_coordinates = 16;
+ int box_idx = 0;
+ for (int layer = 0; layer < params.num_layers; layer++) {
+ int height_size = (params.input_height + params.anchor_stride[layer] - 1) /
+ params.anchor_stride[layer];
+ int width_size = (params.input_width + params.anchor_stride[layer] - 1) /
+ params.anchor_stride[layer];
+ // Boxes at even indicees; scores at odd indices
+ uint8_t* boxes_out = model_out[2 * layer];
+ uint8_t* scores_out = model_out[2 * layer + 1];
+ for (int i = 0; i < height_size * width_size; i++) {
+ for (int j = 0; j < params.anchors_per_cell; j++) {
+ int score_idx = i * params.anchors_per_cell + j;
+ int chan_idx = num_coordinates * score_idx;
+ // dequantize box
+ boxes[box_idx].y =
+ dequantize(boxes_out[chan_idx], params.box_zero_points[layer],
+ params.box_scales[layer]);
+ boxes[box_idx].x =
+ dequantize(boxes_out[chan_idx + 1], params.box_zero_points[layer],
+ params.box_scales[layer]);
+ boxes[box_idx].h =
+ dequantize(boxes_out[chan_idx + 2], params.box_zero_points[layer],
+ params.box_scales[layer]);
+ boxes[box_idx].w =
+ dequantize(boxes_out[chan_idx + 3], params.box_zero_points[layer],
+ params.box_scales[layer]);
+ // dequantize score
+ float dequant_score =
+ dequantize(scores_out[score_idx], params.score_zero_points[layer],
+ params.score_scales[layer]);
+ boxes[box_idx].score = sigmoid(dequant_score);
+ box_idx++;
+ }
+ }
+ }
+}
+
+// Convert box from center encoding to corner encoding format
+static void convert_box(const BoxCenterEncode* box_in, BoxCenterEncode* anchor,
+ BoxCornerEncode* box_out) {
+ float y_center = box_in->y / params.global_scales[0] * anchor->h + anchor->y;
+ float x_center = box_in->x / params.global_scales[1] * anchor->w + anchor->x;
+ float half_h = 0.5 * expf(box_in->h / params.global_scales[2]) * anchor->h;
+ float half_w = 0.5 * expf(box_in->w / params.global_scales[3]) * anchor->w;
+
+ box_out->ymin = y_center - half_h;
+ box_out->xmin = x_center - half_w;
+ box_out->ymax = y_center + half_h;
+ box_out->xmax = x_center + half_w;
+ box_out->score = box_in->score;
+}
+
+// Detect boxes by score thresholding
+static void detect_boxes(const BoxCenterEncode* boxes_in,
+ BoxCenterEncode* anchors, Boxes* boxes_out) {
+ int num_detected_boxes = 0;
+ for (int i = 0; i < params.num_boxes; ++i) {
+ if (boxes_in[i].score > params.score_threshold) {
+ num_detected_boxes++;
+ }
+ }
+ if (!(boxes_out->box)) {
+ boxes_out->box =
+ (BoxCornerEncode*)malloc(sizeof(BoxCornerEncode) * num_detected_boxes);
+ }
+
+ num_detected_boxes = 0;
+ for (int i = 0; i < params.num_boxes; ++i) {
+ if (boxes_in[i].score > params.score_threshold) {
+ convert_box(&(boxes_in[i]), &(anchors[i]),
+ &(boxes_out->box[num_detected_boxes]));
+ num_detected_boxes++;
+ }
+ }
+ boxes_out->num_boxes = num_detected_boxes;
+}
+
+// Decode and extract detected boxes
+void get_detected_boxes(uint8_t** model_out, Boxes* boxes_out) {
+ BoxCenterEncode* boxes_in =
+ (BoxCenterEncode*)malloc(sizeof(BoxCenterEncode) * params.num_boxes);
+ BoxCenterEncode* anchors =
+ (BoxCenterEncode*)malloc(sizeof(BoxCenterEncode) * params.num_boxes);
+
+ generate_anchors(anchors);
+
+ decode_boxes(model_out, boxes_in);
+
+ detect_boxes(boxes_in, anchors, boxes_out);
+
+ free(anchors);
+ free(boxes_in);
+}
diff --git a/ssd_postprocess/box.h b/ssd_postprocess/box.h
new file mode 100644
index 0000000..68ac936
--- /dev/null
+++ b/ssd_postprocess/box.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SSD_POSTPROCESS_BOX_H_
+#define SSD_POSTPROCESS_BOX_H_
+
+#include <stdint.h>
+
+#include "ssd_postprocess/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#ifndef SSD_BOX_DIMS
+#define SSD_BOX_DIMS 4
+#endif
+#ifndef SSD_MAX_LAYERS
+#define SSD_MAX_LAYERS 8
+#endif
+#ifndef SSD_MAX_ANCHORS_PER_CELL
+#define SSD_MAX_ANCHORS_PER_CELL 6
+#endif
+
+typedef struct {
+ int num_layers;
+ int num_boxes;
+ int input_height;
+ int input_width;
+ int global_scales[SSD_BOX_DIMS];
+ int box_zero_points[SSD_BOX_DIMS];
+ float box_scales[SSD_BOX_DIMS];
+ int score_zero_points[SSD_BOX_DIMS];
+ float score_scales[SSD_BOX_DIMS];
+ float score_threshold;
+ int anchors_per_cell;
+ float anchor_base_size[SSD_MAX_LAYERS * SSD_MAX_ANCHORS_PER_CELL];
+ int anchor_stride[SSD_MAX_LAYERS];
+} SsdParams;
+
+// Set SSD parameters
+void set_params(SsdParams* params);
+
+// Decode and extract detected boxes from model outputs
+void get_detected_boxes(uint8_t** model_out, Boxes* boxes);
+
+#ifdef __cplusplusS
+} // extern "C"
+#endif // __cplusplus
+
+#endif // SSD_POSTPROCESS_BOX_H_
diff --git a/ssd_postprocess/common.h b/ssd_postprocess/common.h
new file mode 100644
index 0000000..a88592b
--- /dev/null
+++ b/ssd_postprocess/common.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SSD_POSTPROCESS_COMMON_H_
+#define SSD_POSTPROCESS_COMMON_H_
+
+typedef struct {
+ float y;
+ float x;
+ float h;
+ float w;
+ float score;
+} BoxCenterEncode;
+
+typedef struct {
+ float ymin;
+ float xmin;
+ float ymax;
+ float xmax;
+ float score;
+} BoxCornerEncode;
+
+typedef struct {
+ int num_boxes;
+ BoxCornerEncode* box;
+} Boxes;
+
+#endif // SSD_POSTPROCESS_COMMON_H_
diff --git a/ssd_postprocess/nms.c b/ssd_postprocess/nms.c
new file mode 100644
index 0000000..4a28302
--- /dev/null
+++ b/ssd_postprocess/nms.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// NMS (Non-Maximum Suppression) algorithm
+
+#include "ssd_postprocess/nms.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+// compute area of one box
+static float compute_box_area(const BoxCornerEncode* box) {
+ const float width = box->xmax - box->xmin;
+ const float height = box->ymax - box->ymin;
+ return MAX(0.0, width * height);
+}
+
+// compute IOU (intersection over union) of two boxes
+static float compute_two_boxes_iou(const BoxCornerEncode* box1,
+ const BoxCornerEncode* box2) {
+ const float area1 = compute_box_area(box1);
+ const float area2 = compute_box_area(box2);
+ if (area1 <= 0 || area2 <= 0) return 0.0;
+
+ BoxCornerEncode intersection_box = {.ymin = MAX(box1->ymin, box2->ymin),
+ .xmin = MAX(box1->xmin, box2->xmin),
+ .ymax = MIN(box1->ymax, box2->ymax),
+ .xmax = MIN(box1->xmax, box2->xmax)};
+ float intersection_area = compute_box_area(&intersection_box);
+ return intersection_area / (area1 + area2 - intersection_area);
+}
+
+// comparator for qsort
+static int comparator(const void* p, const void* q) {
+ float x = ((BoxCornerEncode*)p)->score;
+ float y = ((BoxCornerEncode*)q)->score;
+ return (y > x) - (y < x);
+}
+
+// Perform non-maximum suppression algorithm to remove "similar" bounding boxes
+void nms(Boxes* boxes_in, Boxes* boxes_out, const int max_boxes,
+ const float iou_threshold) {
+ int num_boxes = boxes_in->num_boxes;
+ uint8_t* is_suppressed = (uint8_t*)malloc(num_boxes * sizeof(uint8_t));
+ memset(is_suppressed, 0, num_boxes * sizeof(uint8_t));
+
+ // quick sort from greatest to smallest
+ qsort(boxes_in->box, num_boxes, sizeof(BoxCornerEncode), comparator);
+
+ for (int i = 0; i < num_boxes; i++) {
+ if (!is_suppressed[i]) {
+ for (int j = i + 1; j < num_boxes; j++) {
+ if (!is_suppressed[j]) {
+ if (compute_two_boxes_iou(&(boxes_in->box[i]), &(boxes_in->box[j])) >
+ iou_threshold) {
+ is_suppressed[j] = 1;
+ }
+ }
+ }
+ }
+ }
+
+ int ind_out = 0;
+ for (int i = 0; i < num_boxes; i++) {
+ if (ind_out >= max_boxes) break;
+ if (!is_suppressed[i]) {
+ boxes_out->box[ind_out++] = boxes_in->box[i];
+ }
+ }
+ boxes_out->num_boxes = ind_out;
+
+ free(is_suppressed);
+}
diff --git a/ssd_postprocess/nms.h b/ssd_postprocess/nms.h
new file mode 100644
index 0000000..183fc23
--- /dev/null
+++ b/ssd_postprocess/nms.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SSD_POSTPROCESS_NMS_H_
+#define SSD_POSTPROCESS_NMS_H_
+
+#include "ssd_postprocess/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Perform non-maximum suppression algorithm to remove "similar" bounding boxes
+void nms(Boxes* boxes_in, Boxes* boxes_out, const int max_boxes,
+ const float iou_threshold);
+
+#ifdef __cplusplusS
+} // extern "C"
+#endif // __cplusplus
+
+#endif // SSD_POSTPROCESS_NMS_H_
diff --git a/ssd_postprocess/pipeline.c b/ssd_postprocess/pipeline.c
new file mode 100644
index 0000000..ebe251b
--- /dev/null
+++ b/ssd_postprocess/pipeline.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// SSD post-processing pipeline
+
+#include "ssd_postprocess/pipeline.h"
+
+#include <stdlib.h>
+
+#include "ssd_postprocess/box.h"
+#include "ssd_postprocess/nms.h"
+
+void ssd_postprocess_pipeline(uint8_t** model_out, Boxes* boxes,
+ const int max_faces, const float iou_threshold) {
+ Boxes boxes_before_nms = {.num_boxes = 0, .box = NULL};
+ // decode and extract detected boxes
+ get_detected_boxes(model_out, &boxes_before_nms);
+ // non-max suppression
+ nms(&boxes_before_nms, boxes, max_faces, iou_threshold);
+
+ if (boxes_before_nms.box) {
+ free(boxes_before_nms.box);
+ }
+}
diff --git a/ssd_postprocess/pipeline.h b/ssd_postprocess/pipeline.h
new file mode 100644
index 0000000..85719a6
--- /dev/null
+++ b/ssd_postprocess/pipeline.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SSD_POSTPROCESS_PIPELINE_H_
+#define SSD_POSTPROCESS_PIPELINE_H_
+
+#include <stdint.h>
+
+#include "ssd_postprocess/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// SSD post-processing pipeline
+void ssd_postprocess_pipeline(uint8_t** model_out, Boxes* boxes,
+ const int max_faces, const float iou_threshold);
+
+#ifdef __cplusplusS
+} // extern "C"
+#endif // __cplusplus
+
+#endif // SSD_POSTPROCESS_PIPELINE_H_