Add the ssd post-processing block Add the ssd post-processing block, including box decoding and extractions, and non-max suppression (NMS). This code is refactored based on https://spacebeaker-review.googlesource.com/c/shodan/sw/vec-iree/+/24503. The libraries can be built successfully, and the results have been validated. Change-Id: I30cf18982def7a6926f40fb7b5831038d58604af
diff --git a/CMakeLists.txt b/CMakeLists.txt index 314fda5..74835f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -102,6 +102,7 @@ add_subdirectory(risp4ml) add_subdirectory(samples) +add_subdirectory(ssd_postprocess) # Add pigweed support include($ENV{ROOTDIR}/sw/pigweed/pw_build/pigweed.cmake)
diff --git a/README.md b/README.md index d352caa..049ca2c 100644 --- a/README.md +++ b/README.md
@@ -27,6 +27,7 @@ * risp4ml: Vision preprocessing library (Reduced ISP for ML) * samples: Codegen and execution of ML models based on IREE * simple_vec_mul: Point-wise vector multiplication examples +* ssd_postprocess: Vision postprocessing library for Single-Shot Detectors (SSD) ## Build the project
diff --git a/ssd_postprocess/CMakeLists.txt b/ssd_postprocess/CMakeLists.txt new file mode 100644 index 0000000..94a6e52 --- /dev/null +++ b/ssd_postprocess/CMakeLists.txt
@@ -0,0 +1,31 @@ +iree_cc_library( + NAME + box + HDRS + "box.h" + "common.h" + SRCS + "box.c" +) + +iree_cc_library( + NAME + nms + HDRS + "common.h" + "nms.h" + SRCS + "nms.c" +) + +iree_cc_library( + NAME + pipeline + HDRS + "pipeline.h" + SRCS + "pipeline.c" + DEPS + ::box + ::nms +)
diff --git a/ssd_postprocess/box.c b/ssd_postprocess/box.c new file mode 100644 index 0000000..ff0e7a8 --- /dev/null +++ b/ssd_postprocess/box.c
@@ -0,0 +1,185 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// SSD box decoding and extracting + +#include "ssd_postprocess/box.h" + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static SsdParams params = { + .num_layers = 4, + .num_boxes = 1602, + .input_height = 320, + .input_width = 320, + .global_scales = {10, 10, 5, 5}, // y, x, h, w + .box_zero_points = {115, 129, 125, 119}, + .box_scales = {0.0813235, 0.0786732, 0.0687513, 0.0522251}, + .score_zero_points = {211, 195, 200, 225}, + .score_scales = {0.177373, 0.121247, 0.100491, 0.0550178}, + .score_threshold = 0.5, + .anchors_per_cell = 3, + .anchor_base_size = {24.0, 32.0, 40.0, 48.0, 64.0, 80.0, 96.0, 128.0, 160.0, + 192.0, 256.0, 320.0}, + .anchor_stride = {16, 32, 64, 128}}; + +// Set SSD parameters +void set_params(SsdParams* params_in) { params = *params_in; } + +static inline float dequantize(int val, int zero_point, float scale) { + return scale * (val - zero_point); +} + +static inline float sigmoid(float val) { return 1.0 / (1.0 + expf(-val)); } + +// Generate model anchors +// layer0: 20 * 20 * 3 = 1200 +// layer1: 10 * 10 * 3 = 300 +// layer2: 5 * 5 * 3 = 75 +// layer3: 3 * 3 * 3 = 27 +// total sum: 1602 +static void generate_anchors(BoxCenterEncode* anchors) { + int idx = 0; + for (int layer = 0; layer < params.num_layers; ++layer) { + int height_size = (params.input_height + params.anchor_stride[layer] - 1) / + params.anchor_stride[layer]; + int width_size = (params.input_width + params.anchor_stride[layer] - 1) / + params.anchor_stride[layer]; + for (int h = 0; h < height_size; h++) { + for (int w = 0; w < width_size; w++) { + for (int base = 0; base < params.anchors_per_cell; ++base) { + anchors[idx].y = + (float)params.anchor_stride[layer] * h / params.input_height; + anchors[idx].x = + (float)params.anchor_stride[layer] * w / params.input_width; + anchors[idx].h = + params.anchor_base_size[layer * params.anchors_per_cell + base] / + params.input_height; + anchors[idx].w = + params.anchor_base_size[layer * params.anchors_per_cell + base] / + params.input_width; + idx++; + } + } + } + } +} + +// Decode boxes (with score) from model inference outputs +// The locations channel dim is 16 x 3. +// Each 16 is composed of (4 box coordinates + 6 * 2 landmarks coordinates). +// We need only the first 4 box coordinates - so want to keep only indexes: +// 0, 1, 2, 3 +// 16,17,18,19 +// 32,33,34,35 +static void decode_boxes(uint8_t** model_out, BoxCenterEncode* boxes) { + const int num_coordinates = 16; + int box_idx = 0; + for (int layer = 0; layer < params.num_layers; layer++) { + int height_size = (params.input_height + params.anchor_stride[layer] - 1) / + params.anchor_stride[layer]; + int width_size = (params.input_width + params.anchor_stride[layer] - 1) / + params.anchor_stride[layer]; + // Boxes at even indicees; scores at odd indices + uint8_t* boxes_out = model_out[2 * layer]; + uint8_t* scores_out = model_out[2 * layer + 1]; + for (int i = 0; i < height_size * width_size; i++) { + for (int j = 0; j < params.anchors_per_cell; j++) { + int score_idx = i * params.anchors_per_cell + j; + int chan_idx = num_coordinates * score_idx; + // dequantize box + boxes[box_idx].y = + dequantize(boxes_out[chan_idx], params.box_zero_points[layer], + params.box_scales[layer]); + boxes[box_idx].x = + dequantize(boxes_out[chan_idx + 1], params.box_zero_points[layer], + params.box_scales[layer]); + boxes[box_idx].h = + dequantize(boxes_out[chan_idx + 2], params.box_zero_points[layer], + params.box_scales[layer]); + boxes[box_idx].w = + dequantize(boxes_out[chan_idx + 3], params.box_zero_points[layer], + params.box_scales[layer]); + // dequantize score + float dequant_score = + dequantize(scores_out[score_idx], params.score_zero_points[layer], + params.score_scales[layer]); + boxes[box_idx].score = sigmoid(dequant_score); + box_idx++; + } + } + } +} + +// Convert box from center encoding to corner encoding format +static void convert_box(const BoxCenterEncode* box_in, BoxCenterEncode* anchor, + BoxCornerEncode* box_out) { + float y_center = box_in->y / params.global_scales[0] * anchor->h + anchor->y; + float x_center = box_in->x / params.global_scales[1] * anchor->w + anchor->x; + float half_h = 0.5 * expf(box_in->h / params.global_scales[2]) * anchor->h; + float half_w = 0.5 * expf(box_in->w / params.global_scales[3]) * anchor->w; + + box_out->ymin = y_center - half_h; + box_out->xmin = x_center - half_w; + box_out->ymax = y_center + half_h; + box_out->xmax = x_center + half_w; + box_out->score = box_in->score; +} + +// Detect boxes by score thresholding +static void detect_boxes(const BoxCenterEncode* boxes_in, + BoxCenterEncode* anchors, Boxes* boxes_out) { + int num_detected_boxes = 0; + for (int i = 0; i < params.num_boxes; ++i) { + if (boxes_in[i].score > params.score_threshold) { + num_detected_boxes++; + } + } + if (!(boxes_out->box)) { + boxes_out->box = + (BoxCornerEncode*)malloc(sizeof(BoxCornerEncode) * num_detected_boxes); + } + + num_detected_boxes = 0; + for (int i = 0; i < params.num_boxes; ++i) { + if (boxes_in[i].score > params.score_threshold) { + convert_box(&(boxes_in[i]), &(anchors[i]), + &(boxes_out->box[num_detected_boxes])); + num_detected_boxes++; + } + } + boxes_out->num_boxes = num_detected_boxes; +} + +// Decode and extract detected boxes +void get_detected_boxes(uint8_t** model_out, Boxes* boxes_out) { + BoxCenterEncode* boxes_in = + (BoxCenterEncode*)malloc(sizeof(BoxCenterEncode) * params.num_boxes); + BoxCenterEncode* anchors = + (BoxCenterEncode*)malloc(sizeof(BoxCenterEncode) * params.num_boxes); + + generate_anchors(anchors); + + decode_boxes(model_out, boxes_in); + + detect_boxes(boxes_in, anchors, boxes_out); + + free(anchors); + free(boxes_in); +}
diff --git a/ssd_postprocess/box.h b/ssd_postprocess/box.h new file mode 100644 index 0000000..68ac936 --- /dev/null +++ b/ssd_postprocess/box.h
@@ -0,0 +1,64 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SSD_POSTPROCESS_BOX_H_ +#define SSD_POSTPROCESS_BOX_H_ + +#include <stdint.h> + +#include "ssd_postprocess/common.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#ifndef SSD_BOX_DIMS +#define SSD_BOX_DIMS 4 +#endif +#ifndef SSD_MAX_LAYERS +#define SSD_MAX_LAYERS 8 +#endif +#ifndef SSD_MAX_ANCHORS_PER_CELL +#define SSD_MAX_ANCHORS_PER_CELL 6 +#endif + +typedef struct { + int num_layers; + int num_boxes; + int input_height; + int input_width; + int global_scales[SSD_BOX_DIMS]; + int box_zero_points[SSD_BOX_DIMS]; + float box_scales[SSD_BOX_DIMS]; + int score_zero_points[SSD_BOX_DIMS]; + float score_scales[SSD_BOX_DIMS]; + float score_threshold; + int anchors_per_cell; + float anchor_base_size[SSD_MAX_LAYERS * SSD_MAX_ANCHORS_PER_CELL]; + int anchor_stride[SSD_MAX_LAYERS]; +} SsdParams; + +// Set SSD parameters +void set_params(SsdParams* params); + +// Decode and extract detected boxes from model outputs +void get_detected_boxes(uint8_t** model_out, Boxes* boxes); + +#ifdef __cplusplusS +} // extern "C" +#endif // __cplusplus + +#endif // SSD_POSTPROCESS_BOX_H_
diff --git a/ssd_postprocess/common.h b/ssd_postprocess/common.h new file mode 100644 index 0000000..a88592b --- /dev/null +++ b/ssd_postprocess/common.h
@@ -0,0 +1,41 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SSD_POSTPROCESS_COMMON_H_ +#define SSD_POSTPROCESS_COMMON_H_ + +typedef struct { + float y; + float x; + float h; + float w; + float score; +} BoxCenterEncode; + +typedef struct { + float ymin; + float xmin; + float ymax; + float xmax; + float score; +} BoxCornerEncode; + +typedef struct { + int num_boxes; + BoxCornerEncode* box; +} Boxes; + +#endif // SSD_POSTPROCESS_COMMON_H_
diff --git a/ssd_postprocess/nms.c b/ssd_postprocess/nms.c new file mode 100644 index 0000000..4a28302 --- /dev/null +++ b/ssd_postprocess/nms.c
@@ -0,0 +1,95 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// NMS (Non-Maximum Suppression) algorithm + +#include "ssd_postprocess/nms.h" + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif +#ifndef MAX +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +// compute area of one box +static float compute_box_area(const BoxCornerEncode* box) { + const float width = box->xmax - box->xmin; + const float height = box->ymax - box->ymin; + return MAX(0.0, width * height); +} + +// compute IOU (intersection over union) of two boxes +static float compute_two_boxes_iou(const BoxCornerEncode* box1, + const BoxCornerEncode* box2) { + const float area1 = compute_box_area(box1); + const float area2 = compute_box_area(box2); + if (area1 <= 0 || area2 <= 0) return 0.0; + + BoxCornerEncode intersection_box = {.ymin = MAX(box1->ymin, box2->ymin), + .xmin = MAX(box1->xmin, box2->xmin), + .ymax = MIN(box1->ymax, box2->ymax), + .xmax = MIN(box1->xmax, box2->xmax)}; + float intersection_area = compute_box_area(&intersection_box); + return intersection_area / (area1 + area2 - intersection_area); +} + +// comparator for qsort +static int comparator(const void* p, const void* q) { + float x = ((BoxCornerEncode*)p)->score; + float y = ((BoxCornerEncode*)q)->score; + return (y > x) - (y < x); +} + +// Perform non-maximum suppression algorithm to remove "similar" bounding boxes +void nms(Boxes* boxes_in, Boxes* boxes_out, const int max_boxes, + const float iou_threshold) { + int num_boxes = boxes_in->num_boxes; + uint8_t* is_suppressed = (uint8_t*)malloc(num_boxes * sizeof(uint8_t)); + memset(is_suppressed, 0, num_boxes * sizeof(uint8_t)); + + // quick sort from greatest to smallest + qsort(boxes_in->box, num_boxes, sizeof(BoxCornerEncode), comparator); + + for (int i = 0; i < num_boxes; i++) { + if (!is_suppressed[i]) { + for (int j = i + 1; j < num_boxes; j++) { + if (!is_suppressed[j]) { + if (compute_two_boxes_iou(&(boxes_in->box[i]), &(boxes_in->box[j])) > + iou_threshold) { + is_suppressed[j] = 1; + } + } + } + } + } + + int ind_out = 0; + for (int i = 0; i < num_boxes; i++) { + if (ind_out >= max_boxes) break; + if (!is_suppressed[i]) { + boxes_out->box[ind_out++] = boxes_in->box[i]; + } + } + boxes_out->num_boxes = ind_out; + + free(is_suppressed); +}
diff --git a/ssd_postprocess/nms.h b/ssd_postprocess/nms.h new file mode 100644 index 0000000..183fc23 --- /dev/null +++ b/ssd_postprocess/nms.h
@@ -0,0 +1,34 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SSD_POSTPROCESS_NMS_H_ +#define SSD_POSTPROCESS_NMS_H_ + +#include "ssd_postprocess/common.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Perform non-maximum suppression algorithm to remove "similar" bounding boxes +void nms(Boxes* boxes_in, Boxes* boxes_out, const int max_boxes, + const float iou_threshold); + +#ifdef __cplusplusS +} // extern "C" +#endif // __cplusplus + +#endif // SSD_POSTPROCESS_NMS_H_
diff --git a/ssd_postprocess/pipeline.c b/ssd_postprocess/pipeline.c new file mode 100644 index 0000000..ebe251b --- /dev/null +++ b/ssd_postprocess/pipeline.c
@@ -0,0 +1,37 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// SSD post-processing pipeline + +#include "ssd_postprocess/pipeline.h" + +#include <stdlib.h> + +#include "ssd_postprocess/box.h" +#include "ssd_postprocess/nms.h" + +void ssd_postprocess_pipeline(uint8_t** model_out, Boxes* boxes, + const int max_faces, const float iou_threshold) { + Boxes boxes_before_nms = {.num_boxes = 0, .box = NULL}; + // decode and extract detected boxes + get_detected_boxes(model_out, &boxes_before_nms); + // non-max suppression + nms(&boxes_before_nms, boxes, max_faces, iou_threshold); + + if (boxes_before_nms.box) { + free(boxes_before_nms.box); + } +}
diff --git a/ssd_postprocess/pipeline.h b/ssd_postprocess/pipeline.h new file mode 100644 index 0000000..85719a6 --- /dev/null +++ b/ssd_postprocess/pipeline.h
@@ -0,0 +1,36 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SSD_POSTPROCESS_PIPELINE_H_ +#define SSD_POSTPROCESS_PIPELINE_H_ + +#include <stdint.h> + +#include "ssd_postprocess/common.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// SSD post-processing pipeline +void ssd_postprocess_pipeline(uint8_t** model_out, Boxes* boxes, + const int max_faces, const float iou_threshold); + +#ifdef __cplusplusS +} // extern "C" +#endif // __cplusplus + +#endif // SSD_POSTPROCESS_PIPELINE_H_