Line data Source code
1 : /**
2 : * GStreamer / NNStreamer tensor_decoder subplugin, "bounding boxes"
3 : * Copyright (C) 2018 Samsung Electronics Co. Ltd.
4 : * Copyright (C) 2018 MyungJoo Ham <myungjoo.ham@samsung.com>
5 : * Copyright 2021 NXP
6 : *
7 : * This library is free software; you can redistribute it and/or
8 : * modify it under the terms of the GNU Library General Public
9 : * License as published by the Free Software Foundation;
10 : * version 2.1 of the License.
11 : *
12 : * This library is distributed in the hope that it will be useful,
13 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : * Library General Public License for more details.
16 : *
17 : */
18 : /**
19 : * @file tensordec-boundingbox.cc
20 : * @date 15 Nov 2018
21 : * @brief NNStreamer tensor-decoder subplugin, "bounding boxes",
22 : * which converts tensors to video stream w/ boxes on
23 : * transparent background.
24 : * This code is NYI/WIP and not compilable.
25 : *
26 : * @see https://github.com/nnstreamer/nnstreamer
27 : * @author MyungJoo Ham <myungjoo.ham@samsung.com>
28 : * @bug No known bugs except for NYI items
29 : */
30 :
31 : #include <glib.h>
32 :
33 : #include <nnstreamer_plugin_api.h>
34 : #include <nnstreamer_plugin_api_decoder.h>
35 :
36 : #include <stdint.h>
37 : #include <stdlib.h>
38 : #include <string.h>
39 : #include "tensordec-boundingbox.h"
40 :
41 : #ifdef __cplusplus
42 : extern "C" {
43 : #endif /* __cplusplus */
44 : void init_bb (void) __attribute__ ((constructor));
45 : void fini_bb (void) __attribute__ ((destructor));
46 : #ifdef __cplusplus
47 : }
48 : #endif /* __cplusplus */
49 :
50 : /**
51 : * @brief mutex for box properties table.
52 : */
53 : G_LOCK_DEFINE_STATIC (box_properties_table);
54 :
55 : /* font.c */
56 : extern uint8_t rasters[][13];
57 :
58 : /**
59 : * @todo Fill in the value at build time or hardcode this. It's const value
60 : * @brief The bitmap of characters
61 : * [Character (ASCII)][Height][Width]
62 : */
63 : static singleLineSprite_t singleLineSprite;
64 :
65 : /**
66 : * @brief List of bounding-box decoding schemes in string
67 : */
68 : static const char *bb_modes[] = {
69 : [MOBILENET_SSD_BOUNDING_BOX] = "mobilenet-ssd",
70 : [MOBILENET_SSD_PP_BOUNDING_BOX] = "mobilenet-ssd-postprocess",
71 : [OV_PERSON_DETECTION_BOUNDING_BOX] = "ov-person-detection",
72 : [OV_FACE_DETECTION_BOUNDING_BOX] = "ov-face-detection",
73 : [OLDNAME_MOBILENET_SSD_BOUNDING_BOX] = "tflite-ssd",
74 : [OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX] = "tf-ssd",
75 : [YOLOV5_BOUNDING_BOX] = "yolov5",
76 : [MP_PALM_DETECTION_BOUNDING_BOX] = "mp-palm-detection",
77 : [YOLOV8_BOUNDING_BOX] = "yolov8",
78 : [YOLOV8_ORIENTED_BOUNDING_BOX] = "yolov8-obb",
79 : NULL,
80 : };
81 :
82 : /**
83 : * @brief Change deprecated mode name
84 : */
85 : static const char *
86 15 : updateDecodingMode (const char *param)
87 : {
88 15 : if (g_strcmp0 (param, bb_modes[OLDNAME_MOBILENET_SSD_BOUNDING_BOX]) == 0) {
89 2 : return bb_modes[MOBILENET_SSD_BOUNDING_BOX];
90 : }
91 :
92 13 : if (g_strcmp0 (param, bb_modes[OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX]) == 0) {
93 2 : return bb_modes[MOBILENET_SSD_PP_BOUNDING_BOX];
94 : }
95 :
96 11 : return param;
97 : }
98 :
99 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
100 : static int
101 15 : bb_init (void **pdata)
102 : {
103 : /** @todo check if we need to ensure plugin_data is not yet allocated */
104 : try {
105 15 : BoundingBox *bdata = new BoundingBox ();
106 15 : *pdata = bdata;
107 0 : } catch (...) {
108 0 : GST_ERROR ("Failed to allocate memory for decoder subplugin.");
109 0 : return FALSE;
110 0 : }
111 :
112 15 : initSingleLineSprite (singleLineSprite, rasters, PIXEL_VALUE);
113 :
114 15 : return TRUE;
115 : }
116 :
117 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
118 : static void
119 15 : bb_exit (void **pdata)
120 : {
121 15 : BoundingBox *bdata = static_cast<BoundingBox *> (*pdata);
122 15 : delete bdata;
123 15 : *pdata = NULL;
124 15 : }
125 :
126 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
127 : static int
128 77 : bb_setOption (void **pdata, int opNum, const char *param)
129 : {
130 77 : BoundingBox *bdata = static_cast<BoundingBox *> (*pdata);
131 77 : BoundingBoxOption option = static_cast<BoundingBoxOption> (opNum);
132 77 : return bdata->setOption (option, param);
133 : }
134 :
135 : /**
136 : * @brief tensordec-plugin's GstTensorDecoderDef callback
137 : *
138 : * [Mobilenet SSD Model]
139 : * The first tensor is boxes. BOX_SIZE : 1 : #MaxDetection, ANY-TYPE
140 : * The second tensor is labels. #MaxLabel : #MaxDetection, ANY-TYPE
141 : * Both tensors are MANDATORY!
142 : *
143 : * [Mobilenet SSD Postprocess Model]
144 : * Tensors mapping is defined through option-3, with following syntax:
145 : * LOCATIONS_IDX:CLASSES_IDX:SCORES_IDX:NUM_DETECTION_IDX
146 : *
147 : * Default configuration is: 3:1:2:0
148 : *
149 : * num_detection (default 1st tensor). 1, ANY-TYPE
150 : * detection_classes (default 2nd tensor). #MaxDetection, ANY-TYPE
151 : * detection_scores (default 3rd tensor). #MaxDetection, ANY-TYPE
152 : * detection_boxes (default 4th tensor). BOX_SIZE : #MaxDetection, ANY-TYPE
153 : *
154 : * all of tensors are MANDATORY!
155 : *
156 : * If there are third or more tensors, such tensors will be ignored.
157 : */
158 : static GstCaps *
159 144 : bb_getOutCaps (void **pdata, const GstTensorsConfig *config)
160 : {
161 : /** @todo this is compatible with "SSD" only. expand the capability! */
162 144 : BoundingBox *bdata = static_cast<BoundingBox *> (*pdata);
163 144 : return bdata->getOutCaps (config);
164 : }
165 :
166 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
167 : static GstFlowReturn
168 26 : bb_decode (void **pdata, const GstTensorsConfig *config,
169 : const GstTensorMemory *input, GstBuffer *outbuf)
170 : {
171 26 : BoundingBox *bdata = static_cast<BoundingBox *> (*pdata);
172 26 : return bdata->decode (config, input, outbuf);
173 : }
174 :
175 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
176 : static size_t
177 10 : bb_getTransformSize (void **pdata, const GstTensorsConfig *config,
178 : GstCaps *caps, size_t size, GstCaps *othercaps, GstPadDirection direction)
179 : {
180 : UNUSED (pdata);
181 : UNUSED (config);
182 : UNUSED (caps);
183 : UNUSED (size);
184 : UNUSED (othercaps);
185 : UNUSED (direction);
186 :
187 10 : return 0;
188 : /** @todo Use appropriate values */
189 : }
190 :
191 : static gchar decoder_subplugin_bounding_box[] = "bounding_boxes";
192 :
193 : /** @brief Bounding box tensordec-plugin GstTensorDecoderDef instance */
194 : static GstTensorDecoderDef boundingBox = { .modename = decoder_subplugin_bounding_box,
195 : .init = bb_init,
196 : .exit = bb_exit,
197 : .setOption = bb_setOption,
198 : .getOutCaps = bb_getOutCaps,
199 : .decode = bb_decode,
200 : .getTransformSize = bb_getTransformSize };
201 :
202 : static gchar *custom_prop_desc = NULL;
203 :
204 : /** @brief Initialize this object for tensordec-plugin */
205 : void
206 15 : init_bb (void)
207 : {
208 15 : nnstreamer_decoder_probe (&boundingBox);
209 :
210 : {
211 15 : g_autofree gchar *sub_desc = g_strjoinv ("|", (GStrv) bb_modes);
212 :
213 15 : g_free (custom_prop_desc);
214 15 : custom_prop_desc = g_strdup_printf ("Decoder mode of bounding box: [%s]", sub_desc);
215 :
216 15 : nnstreamer_decoder_set_custom_property_desc (decoder_subplugin_bounding_box,
217 : "option1", custom_prop_desc, "option2",
218 : "Location of the label file. This is independent from option1.", "option3",
219 : "Sub-option values that depend on option1;\n"
220 : "\tfor yolov5 and yolov8 mode:\n"
221 : "\t\tThe option3 requires up to 3 numbers, which tell\n"
222 : "\t\t- whether the output values are scaled or not\n"
223 : "\t\t 0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)\n"
224 : "\t\t- the threshold of confidence (optional, default set to 0.25)\n"
225 : "\t\t- the threshold of IOU (optional, default set to 0.45)\n"
226 : "\t\tAn example of option3 is option3 = 0: 0.65:0.6 \n"
227 : "\tfor yolov8 obb mode:\n"
228 : "\t\tThe option3 requires up to 4 numbers, which tell\n"
229 : "\t\t- whether the output values are scaled or not\n"
230 : "\t\t 0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)\n"
231 : "\t\t- the threshold of confidence (optional, default set to 0.25)\n"
232 : "\t\t- the threshold of IOU (optional, default set to 0.45)\n"
233 : "\t\t- whether to output oriented bounding boxes\n"
234 : "\t\t 0: axis-aligned boxes, 1: oriented boxes (default)\n"
235 : "\t\tAn example of option3 is option3=1:0.5:0.5:1\n"
236 : "\tfor mobilenet-ssd mode:\n"
237 : "\t\tThe option3 definition scheme is, in order, as follows\n"
238 : "\t\t- box priors location file (mandatory)\n"
239 : "\t\t- detection threshold (optional, default set to 0.5)box priors location file (mandatory)\n"
240 : "\t\t- Y box scale (optional, default set to 10.0)\n"
241 : "\t\t- X box scale (optional, default set to 10.0)\n"
242 : "\t\t- H box scale (optional, default set to 5.0)\n"
243 : "\t\t- W box scale (optional, default set to 5.0)\n"
244 : "\t\tThe default parameters value could be set in the following ways:\n"
245 : "\t\t option3=box-priors.txt:0.5:10.0:10.0:5.0:5.0:0.5\n"
246 : "\t\t option3=box-priors.txt\n"
247 : "\t\t option3=box-priors.txt::::::\n"
248 : "\t\tIt's possible to set only few values, using the default values for those not specified through the command line.\n"
249 : "\t\tYou could specify respectively the detection and IOU thresholds to 0.65 and 0.6 with the option3 parameter as follow:\n"
250 : "\t\t option3=box-priors.txt:0.65:::::0.6\n"
251 : "\tfor mobilenet-ssd-postprocess mode:\n"
252 : "\t\tThe option3 is required to have 5 integer numbers, which tell the tensor-dec how to interpret the given tensor inputs.\n"
253 : "\t\tThe first 4 numbers separated by colon, \':\', designate which are location:class:score:number of the tensors.\n"
254 : "\t\tThe last number separated by comma, ',\' from the first 4 numbers designate the threshold in percent.\n"
255 : "\t\tIn other words, \"option3=%i:%i:%i:%i,%i\"\n"
256 : "\tfor mp-palm-detection mode:\n"
257 : "\t\tThe option3 is required to have five float numbers, as follows;\n"
258 : "\t\t- box score threshold (mandatory)\n"
259 : "\t\t- number of layers for anchor generation (optional, default set to 4)\n"
260 : "\t\t- minimum scale factor for anchor generation (optional, default set to 1.0)\n"
261 : "\t\t- maximum scale factor for anchor generation (optional, default set to 1.0)\n"
262 : "\t\t- X offset (optional, default set to 0.5)\n"
263 : "\t\t- Y offset (optional, default set to 0.5)\n"
264 : "\t\t- strides for each layer for anchor generation (optional, default set to 8:16:16:16)\n"
265 : "\t\tThe default parameter value could be set in the following ways:\n"
266 : "\t\t option3=0.5\n"
267 : "\t\t option3=0.5:4:0.2:0.8\n"
268 : "\t\t option3=0.5:4:1.0:1.0:0.5:0.5:8:16:16:16",
269 : "option4", "Video Output Dimension (WIDTH:HEIGHT). This is independent from option1.",
270 : "option5", "Input Dimension (WIDTH:HEIGHT). This is independent from option1.", "option6",
271 : "Whether to track result bounding boxes or not\n"
272 : "\t\t 0 (default, do not track)\n"
273 : "\t\t 1 (track result bounding boxes, with naive centroid based algorithm)",
274 : "option7",
275 : "Whether to log the result bounding boxes or not\n"
276 : "\t\t 0 (default, do not log)\n"
277 : "\t\t 1 (log result bounding boxes)"
278 : "\tThis is independent from option1",
279 : "option8", "Box Style (NYI)", NULL);
280 15 : }
281 15 : }
282 :
283 : /** @brief Destruct this object for tensordec-plugin */
284 : void
285 15 : fini_bb (void)
286 : {
287 15 : g_free (custom_prop_desc);
288 15 : custom_prop_desc = NULL;
289 15 : nnstreamer_decoder_exit (boundingBox.modename);
290 15 : }
291 :
292 : /** @brief Compare function for sorting distances. */
293 : static int
294 96 : distance_compare (const void *a, const void *b)
295 : {
296 96 : const distanceArrayData *da = (const distanceArrayData *) a;
297 96 : const distanceArrayData *db = (const distanceArrayData *) b;
298 :
299 96 : if (da->distance < db->distance)
300 34 : return -1;
301 62 : if (da->distance > db->distance)
302 42 : return 1;
303 20 : return 0;
304 : }
305 :
306 : /**
307 : * @brief Compare Function for g_array_sort with detectedObject.
308 : */
309 : static gint
310 1748 : compare_detection (gconstpointer _a, gconstpointer _b)
311 : {
312 1748 : const detectedObject *a = static_cast<const detectedObject *> (_a);
313 1748 : const detectedObject *b = static_cast<const detectedObject *> (_b);
314 :
315 : /* Larger comes first */
316 1748 : return (a->prob > b->prob) ? -1 : ((a->prob == b->prob) ? 0 : 1);
317 : }
318 :
319 : /**
320 : * @brief Calculate the intersected surface
321 : */
322 : static gfloat
323 850 : iou (detectedObject *a, detectedObject *b)
324 : {
325 850 : int x1 = MAX (a->x, b->x);
326 850 : int y1 = MAX (a->y, b->y);
327 850 : int x2 = MIN (a->x + a->width, b->x + b->width);
328 850 : int y2 = MIN (a->y + a->height, b->y + b->height);
329 850 : int w = MAX (0, (x2 - x1 + 1));
330 850 : int h = MAX (0, (y2 - y1 + 1));
331 850 : float inter = w * h;
332 850 : float areaA = a->width * a->height;
333 850 : float areaB = b->width * b->height;
334 850 : float o = inter / (areaA + areaB - inter);
335 850 : return (o >= 0) ? o : 0;
336 : }
337 :
338 : /**
339 : * @brief Calculate the corners using center position and angle.
340 : * @note The angle (obj->angle) must be radian.
341 : */
342 : static void
343 0 : get_rotated_rect_corners (detectedObject *obj, Point corners[4])
344 : {
345 0 : float cx = obj->x;
346 0 : float cy = obj->y;
347 0 : float angle = obj->angle;
348 :
349 0 : float cos_a = cos (angle);
350 0 : float sin_a = sin (angle);
351 :
352 0 : float half_w = static_cast<float> (obj->width) / 2;
353 0 : float half_h = static_cast<float> (obj->height) / 2;
354 :
355 0 : float dx[4] = { -half_w, half_w, half_w, -half_w };
356 0 : float dy[4] = { -half_h, -half_h, half_h, half_h };
357 :
358 0 : for (int i = 0; i < 4; i++) {
359 0 : corners[i].x = cx + dx[i] * cos_a - dy[i] * sin_a;
360 0 : corners[i].y = cy + dx[i] * sin_a + dy[i] * cos_a;
361 : }
362 0 : }
363 :
364 : /**
365 : * @brief Check whether point is in the polygon
366 : */
367 : static bool
368 0 : is_point_inside (Point point, const Point *polygon, int n)
369 : {
370 0 : int intersections = 0;
371 :
372 0 : for (int i = 0; i < n; i++) {
373 0 : Point p1 = polygon[i];
374 0 : Point p2 = polygon[(i + 1) % n];
375 :
376 0 : if ((p1.y > point.y) != (p2.y > point.y)) {
377 0 : float intersect_x = p1.x + (point.y - p1.y) * (p2.x - p1.x) / (p2.y - p1.y);
378 :
379 0 : if (intersect_x > point.x) {
380 0 : intersections++;
381 : }
382 : }
383 : }
384 :
385 0 : return (intersections % 2 == 1);
386 : }
387 :
388 : /**
389 : * @brief iou for obb
390 : */
391 : static float
392 0 : iou_obb (detectedObject *a, detectedObject *b)
393 : {
394 : Point corners_a[4], corners_b[4];
395 0 : get_rotated_rect_corners (a, corners_a);
396 0 : get_rotated_rect_corners (b, corners_b);
397 :
398 0 : float area_a = a->width * a->height;
399 0 : float area_b = b->width * b->height;
400 :
401 0 : Point *small_corners = (area_a < area_b) ? corners_a : corners_b;
402 0 : float small_width = (area_a < area_b) ? a->width : b->width;
403 0 : float small_height = (area_a < area_b) ? a->height : b->height;
404 0 : float small_angle = (area_a < area_b) ? a->angle : b->angle;
405 :
406 0 : int grid_x = 5, grid_y = 5;
407 0 : int overlap_count = 0, total_count = 0;
408 :
409 0 : float step_x = small_width / (grid_x + 1);
410 0 : float step_y = small_height / (grid_y + 1);
411 :
412 0 : for (int i = 0; i <= grid_x; i++) {
413 0 : for (int j = 0; j <= grid_y; j++) {
414 : Point test_point;
415 0 : test_point.x = small_corners[0].x + (i + 0.5) * step_x * cos (small_angle)
416 0 : - (j + 0.5) * step_y * sin (small_angle);
417 0 : test_point.y = small_corners[0].y + (i + 0.5) * step_x * sin (small_angle)
418 0 : + (j + 0.5) * step_y * cos (small_angle);
419 :
420 0 : total_count++;
421 :
422 0 : if (is_point_inside (test_point, corners_a, 4)
423 0 : && is_point_inside (test_point, corners_b, 4)) {
424 0 : overlap_count++;
425 : }
426 : }
427 : }
428 :
429 0 : float overlap_area = (float) overlap_count / total_count * small_width * small_height;
430 0 : float union_area = area_a + area_b - overlap_area;
431 :
432 0 : return overlap_area / union_area;
433 : }
434 :
435 : /**
436 : * @brief Apply NMS to the given results (objects[DETECTION_MAX])
437 : */
438 : void
439 17 : nms (GArray *results, gfloat threshold, bounding_box_modes mode)
440 : {
441 : guint boxes_size;
442 : guint i, j;
443 :
444 17 : boxes_size = results->len;
445 17 : if (boxes_size == 0U)
446 0 : return;
447 :
448 17 : g_array_sort (results, compare_detection);
449 :
450 474 : for (i = 0; i < boxes_size; i++) {
451 457 : detectedObject *a = &g_array_index (results, detectedObject, i);
452 457 : if (a->valid == TRUE) {
453 1225 : for (j = i + 1; j < boxes_size; j++) {
454 1174 : detectedObject *b = &g_array_index (results, detectedObject, j);
455 1174 : if (b->valid == TRUE) {
456 850 : float iou_value = 0.0f;
457 850 : if (mode == YOLOV8_ORIENTED_BOUNDING_BOX) {
458 0 : iou_value = iou_obb (a, b);
459 : } else {
460 850 : iou_value = iou (a, b);
461 : }
462 850 : if (iou_value > threshold) {
463 406 : b->valid = FALSE;
464 : }
465 : }
466 : }
467 : }
468 : }
469 :
470 17 : i = 0;
471 : do {
472 457 : detectedObject *a = &g_array_index (results, detectedObject, i);
473 457 : if (a->valid == FALSE)
474 406 : g_array_remove_index (results, i);
475 : else
476 51 : i++;
477 457 : } while (i < results->len);
478 : }
479 :
480 : /**
481 : * @brief check the num_tensors is valid
482 : */
483 : int
484 144 : check_tensors (const GstTensorsConfig *config, const unsigned int limit)
485 : {
486 : unsigned int i;
487 : GstTensorInfo *_info, *_base;
488 :
489 144 : g_return_val_if_fail (config != NULL, FALSE);
490 144 : g_return_val_if_fail (config->info.num_tensors >= limit, FALSE);
491 :
492 94 : if (config->info.num_tensors > limit) {
493 0 : GST_WARNING ("tensor-decoder:boundingbox accepts %d or less tensors. "
494 : "You are wasting the bandwidth by supplying %d tensors.",
495 : limit, config->info.num_tensors);
496 : }
497 :
498 : /* tensor-type of the tensors should be the same */
499 94 : if (config->info.num_tensors > 0) {
500 94 : _base = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, 0);
501 :
502 166 : for (i = 1; i < config->info.num_tensors; ++i) {
503 72 : _info = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, i);
504 72 : g_return_val_if_fail (_base->type == _info->type, FALSE);
505 : }
506 : }
507 :
508 94 : return TRUE;
509 : }
510 :
511 : /** @brief Constructor of BoundingBox */
512 15 : BoundingBox::BoundingBox ()
513 : {
514 15 : mode = BOUNDING_BOX_UNKNOWN;
515 15 : width = 0;
516 15 : height = 0;
517 15 : flag_use_label = FALSE;
518 15 : do_log = 0;
519 :
520 : /* for track */
521 15 : is_track = 0;
522 15 : centroids_last_id = 0U;
523 15 : max_centroids_num = 100U;
524 15 : consecutive_disappear_threshold = 100U;
525 15 : centroids = g_array_sized_new (TRUE, TRUE, sizeof (centroid), max_centroids_num);
526 30 : distanceArray = g_array_sized_new (TRUE, TRUE, sizeof (distanceArrayData),
527 15 : max_centroids_num * max_centroids_num);
528 :
529 15 : label_path = nullptr;
530 15 : labeldata.labels = nullptr;
531 15 : labeldata.max_word_length = 0;
532 15 : labeldata.total_labels = 0;
533 15 : bdata = nullptr;
534 15 : }
535 :
536 : /** @brief destructor of BoundingBox */
537 15 : BoundingBox::~BoundingBox ()
538 : {
539 15 : _free_labels (&labeldata);
540 :
541 15 : if (label_path)
542 13 : g_free (label_path);
543 :
544 15 : g_array_free (centroids, TRUE);
545 15 : g_array_free (distanceArray, TRUE);
546 :
547 15 : G_LOCK (box_properties_table);
548 15 : g_hash_table_destroy (properties_table);
549 15 : properties_table = nullptr;
550 15 : G_UNLOCK (box_properties_table);
551 15 : }
552 :
553 : /**
554 : * @brief Update centroids with given bounding boxes.
555 : */
556 : void
557 3 : BoundingBox::updateCentroids (GArray *boxes)
558 : {
559 : guint i, j;
560 3 : if (boxes->len > max_centroids_num) {
561 0 : nns_logw ("updateCentroids: too many detected objects");
562 0 : return;
563 : }
564 : /* remove disappeared centroids */
565 3 : i = 0;
566 11 : while (i < centroids->len) {
567 8 : centroid *c = &g_array_index (centroids, centroid, i);
568 8 : if (c->consecutive_disappeared_frames >= consecutive_disappear_threshold) {
569 0 : g_array_remove_index (centroids, i);
570 : } else {
571 8 : i++;
572 : }
573 : }
574 :
575 3 : if (centroids->len > max_centroids_num) {
576 0 : nns_logw ("update_centroids: too many detected centroids");
577 0 : return;
578 : }
579 : /* if boxes is empty */
580 3 : if (boxes->len == 0U) {
581 : guint i;
582 0 : for (i = 0; i < centroids->len; i++) {
583 0 : centroid *c = &g_array_index (centroids, centroid, i);
584 :
585 0 : if (c->id > 0)
586 0 : c->consecutive_disappeared_frames++;
587 : }
588 :
589 0 : return;
590 : }
591 : /* initialize centroids with given boxes */
592 3 : if (centroids->len == 0U) {
593 : guint i;
594 5 : for (i = 0; i < boxes->len; i++) {
595 4 : detectedObject *box = &g_array_index (boxes, detectedObject, i);
596 : centroid c;
597 :
598 4 : centroids_last_id++;
599 4 : c.id = centroids_last_id;
600 4 : c.consecutive_disappeared_frames = 0;
601 4 : c.cx = box->x + box->width / 2;
602 4 : c.cy = box->y + box->height / 2;
603 4 : c.matched_box_idx = i;
604 :
605 4 : g_array_append_val (centroids, c);
606 :
607 4 : box->tracking_id = c.id;
608 : }
609 :
610 1 : return;
611 : }
612 : /* calculate the distance among centroids and boxes */
613 2 : g_array_set_size (distanceArray, centroids->len * boxes->len);
614 :
615 10 : for (i = 0; i < centroids->len; i++) {
616 8 : centroid *c = &g_array_index (centroids, centroid, i);
617 8 : c->matched_box_idx = G_MAXUINT32;
618 :
619 40 : for (j = 0; j < boxes->len; j++) {
620 32 : detectedObject *box = &g_array_index (boxes, detectedObject, j);
621 32 : distanceArrayData *d
622 32 : = &g_array_index (distanceArray, distanceArrayData, i * centroids->len + j);
623 :
624 32 : d->centroid_idx = i;
625 32 : d->box_idx = j;
626 :
627 : /* invalid centroid */
628 32 : if (c->id == 0) {
629 0 : d->distance = G_MAXUINT64;
630 : } else {
631 : /* calculate euclidean distance */
632 32 : int bcx = box->x + box->width / 2;
633 32 : int bcy = box->y + box->height / 2;
634 :
635 32 : d->distance = (guint64) (c->cx - bcx) * (c->cx - bcx)
636 32 : + (guint64) (c->cy - bcy) * (c->cy - bcy);
637 : }
638 : }
639 : }
640 :
641 2 : g_array_sort (distanceArray, distance_compare);
642 :
643 : {
644 : /* Starting from the least distance pair (centroid, box), matching each other */
645 : guint dIdx, cIdx, bIdx;
646 :
647 34 : for (dIdx = 0; dIdx < distanceArray->len; dIdx++) {
648 32 : distanceArrayData *d = &g_array_index (distanceArray, distanceArrayData, dIdx);
649 32 : centroid *c = &g_array_index (centroids, centroid, d->centroid_idx);
650 32 : detectedObject *box = &g_array_index (boxes, detectedObject, d->box_idx);
651 :
652 32 : bIdx = d->box_idx;
653 :
654 : /* the centroid is invalid */
655 32 : if (c->id == 0) {
656 0 : continue;
657 : }
658 : /* the box is already assigned to a centroid */
659 32 : if (box->tracking_id != 0) {
660 24 : continue;
661 : }
662 : /* the centroid is already assigned to a box */
663 8 : if (c->matched_box_idx != G_MAXUINT32) {
664 0 : continue;
665 : }
666 : /* now match the box with the centroid */
667 8 : c->matched_box_idx = bIdx;
668 8 : box->tracking_id = c->id;
669 8 : c->consecutive_disappeared_frames = 0;
670 : }
671 :
672 : /* increase consecutive_disappeared_frames of unmatched centroids */
673 10 : for (cIdx = 0; cIdx < centroids->len; cIdx++) {
674 8 : centroid *c = &g_array_index (centroids, centroid, cIdx);
675 :
676 8 : if (c->id == 0) {
677 0 : continue;
678 : }
679 :
680 8 : if (c->matched_box_idx == G_MAXUINT32) {
681 0 : c->consecutive_disappeared_frames++;
682 : }
683 : }
684 :
685 : /* for those unmatched boxes - register as new centroids */
686 10 : for (bIdx = 0; bIdx < boxes->len; bIdx++) {
687 8 : detectedObject *box = &g_array_index (boxes, detectedObject, bIdx);
688 : centroid c;
689 :
690 8 : if (box->tracking_id != 0) {
691 8 : continue;
692 : }
693 :
694 0 : centroids_last_id++;
695 0 : c.id = centroids_last_id;
696 0 : c.consecutive_disappeared_frames = 0;
697 0 : c.cx = box->x + box->width / 2;
698 0 : c.cy = box->y + box->height / 2;
699 0 : c.matched_box_idx = bIdx;
700 :
701 0 : g_array_append_val (centroids, c);
702 :
703 0 : box->tracking_id = c.id;
704 : }
705 : }
706 : }
707 :
708 : /**
709 : * @brief Draw a line on the frame buffer.
710 : */
711 : static void
712 0 : draw_line (uint32_t *frame, int width, int height, int x0, int y0, int x1, int y1, uint32_t color)
713 : {
714 0 : int dx = abs (x1 - x0), sx = x0 < x1 ? 1 : -1;
715 0 : int dy = -abs (y1 - y0), sy = y0 < y1 ? 1 : -1;
716 0 : int err = dx + dy, e2;
717 :
718 : while (1) {
719 0 : if (x0 >= 0 && x0 < width && y0 >= 0 && y0 < height) {
720 0 : frame[y0 * width + x0] = color;
721 : }
722 0 : if (x0 == x1 && y0 == y1)
723 0 : break;
724 0 : e2 = 2 * err;
725 0 : if (e2 >= dy) {
726 0 : err += dy;
727 0 : x0 += sx;
728 : }
729 0 : if (e2 <= dx) {
730 0 : err += dx;
731 0 : y0 += sy;
732 : }
733 : }
734 0 : }
735 :
736 : /**
737 : * @brief Draw with the given results (objects[DETECTION_MAX]) to the output buffer
738 : * @param[out] out_info The output buffer (RGBA plain)
739 : * @param[in] bdata The bounding-box internal data.
740 : * @param[in] results The final results to be drawn.
741 : */
742 : void
743 26 : BoundingBox::draw (GstMapInfo *out_info, GArray *results)
744 : {
745 26 : uint32_t *frame = (uint32_t *) out_info->data; /* Let's draw per pixel (4bytes) */
746 : unsigned int i;
747 : guint i_width, i_height;
748 :
749 26 : i_width = bdata->getInputWidth ();
750 26 : i_height = bdata->getInputHeight ();
751 :
752 110 : for (i = 0; i < results->len; i++) {
753 84 : detectedObject *a = &g_array_index (results, detectedObject, i);
754 :
755 84 : if ((flag_use_label)
756 80 : && ((a->class_id < 0 || a->class_id >= (int) labeldata.total_labels))) {
757 : /** @todo make it "logw_once" after we get logw_once API. */
758 0 : ml_logw ("Invalid class found with tensordec-boundingbox.\n");
759 0 : continue;
760 : }
761 :
762 84 : if (mode == YOLOV8_ORIENTED_BOUNDING_BOX) {
763 : /* For rotated boxes */
764 : Point corners[4];
765 0 : get_rotated_rect_corners (a, corners);
766 :
767 : /* Scale the corners to output image size */
768 0 : for (int j = 0; j < 4; j++) {
769 0 : corners[j].x = (width * corners[j].x) / i_width;
770 0 : corners[j].y = (height * corners[j].y) / i_height;
771 : }
772 :
773 : /* Draw lines between the corners */
774 0 : for (int j = 0; j < 4; j++) {
775 0 : int x_start = (int) corners[j].x;
776 0 : int y_start = (int) corners[j].y;
777 0 : int x_end = (int) corners[(j + 1) % 4].x;
778 0 : int y_end = (int) corners[(j + 1) % 4].y;
779 :
780 0 : draw_line (frame, width, height, x_start, y_start, x_end, y_end, PIXEL_VALUE);
781 : }
782 : } else {
783 : int x1, x2, y1, y2; /* Box positions on the output surface */
784 : int j;
785 : uint32_t *pos1, *pos2;
786 : /* 1. Draw Boxes */
787 84 : x1 = (width * a->x) / i_width;
788 84 : x2 = MIN (width - 1, (width * (a->x + a->width)) / i_width);
789 84 : y1 = (height * a->y) / i_height;
790 84 : y2 = MIN (height - 1, (height * (a->y + a->height)) / i_height);
791 :
792 : /* 1-1. Horizontal */
793 84 : pos1 = &frame[y1 * width + x1];
794 84 : pos2 = &frame[y2 * width + x1];
795 5174 : for (j = x1; j <= x2; j++) {
796 5090 : *pos1 = PIXEL_VALUE;
797 5090 : *pos2 = PIXEL_VALUE;
798 5090 : pos1++;
799 5090 : pos2++;
800 : }
801 :
802 : /* 1-2. Vertical */
803 84 : pos1 = &frame[(y1 + 1) * width + x1];
804 84 : pos2 = &frame[(y1 + 1) * width + x2];
805 5950 : for (j = y1 + 1; j < y2; j++) {
806 5866 : *pos1 = PIXEL_VALUE;
807 5866 : *pos2 = PIXEL_VALUE;
808 5866 : pos1 += width;
809 5866 : pos2 += width;
810 : }
811 :
812 : /* 2. Write Labels + tracking ID */
813 84 : if (flag_use_label) {
814 80 : g_autofree gchar *label = NULL;
815 : gsize k, label_len;
816 :
817 80 : if (is_track != 0) {
818 12 : label = g_strdup_printf ("%s-%d", labeldata.labels[a->class_id], a->tracking_id);
819 : } else {
820 68 : label = g_strdup_printf ("%s", labeldata.labels[a->class_id]);
821 : }
822 :
823 80 : label_len = label ? strlen (label) : 0;
824 :
825 : /* x1 is the same: x1 = MAX (0, (width * a->x) / i_width); */
826 80 : y1 = MAX (0, (y1 - 14));
827 80 : pos1 = &frame[y1 * width + x1];
828 475 : for (k = 0; k < label_len; k++) {
829 395 : unsigned int char_index = label[k];
830 395 : if ((x1 + 8) > (int) width)
831 0 : break; /* Stop drawing if it may overfill */
832 395 : pos2 = pos1;
833 5530 : for (y2 = 0; y2 < 13; y2++) {
834 : /* 13 : character height */
835 46215 : for (x2 = 0; x2 < 8; x2++) {
836 : /* 8: character width */
837 41080 : *(pos2 + x2) = singleLineSprite[char_index][y2][x2];
838 : }
839 5135 : pos2 += width;
840 : }
841 395 : x1 += 9;
842 395 : pos1 += 9; /* character width + 1px */
843 : }
844 80 : }
845 : }
846 : }
847 26 : }
848 :
849 : /**
850 : * @brief Log the given results
851 : */
852 : void
853 6 : BoundingBox::logBoxes (GArray *results)
854 : {
855 : guint i;
856 :
857 6 : nns_logi ("Detect %u boxes in %u x %u input image", results->len,
858 : bdata->getInputWidth (), bdata->getInputHeight ());
859 30 : for (i = 0; i < results->len; i++) {
860 24 : detectedObject *b = &g_array_index (results, detectedObject, i);
861 24 : if (labeldata.total_labels > 0) {
862 24 : if (mode == YOLOV8_ORIENTED_BOUNDING_BOX) {
863 0 : nns_logi ("[%s] x:%d y:%d w:%d h:%d angle:%.2f prob:%.4f",
864 : labeldata.labels[b->class_id], b->x, b->y, b->width, b->height,
865 : b->angle, b->prob);
866 : } else {
867 24 : nns_logi ("[%s] x:%d y:%d w:%d h:%d prob:%.4f",
868 : labeldata.labels[b->class_id], b->x, b->y, b->width, b->height, b->prob);
869 : }
870 : } else {
871 0 : if (mode == YOLOV8_ORIENTED_BOUNDING_BOX) {
872 0 : nns_logi ("x:%d y:%d w:%d h:%d angle:%.2f prob:%.4f", b->x, b->y,
873 : b->width, b->height, b->angle, b->prob);
874 : } else {
875 0 : nns_logi ("x:%d y:%d w:%d h:%d prob:%.4f", b->x, b->y, b->width, b->height, b->prob);
876 : }
877 : }
878 : }
879 6 : }
880 :
881 : /**
882 : * @brief Check the label relevant properties are valid
883 : */
884 : gboolean
885 26 : BoundingBox::checkLabelProps ()
886 : {
887 26 : if ((!label_path) || (!labeldata.labels) || (labeldata.total_labels <= 0))
888 4 : return FALSE;
889 22 : return TRUE;
890 : }
891 :
892 : /**
893 : * @brief Set mode of bounding box
894 : */
895 : int
896 15 : BoundingBox::setBoxDecodingMode (const char *param)
897 : {
898 15 : if (NULL == param || *param == '\0') {
899 0 : GST_ERROR ("Please set the valid mode at option1 to set box decoding mode");
900 0 : return FALSE;
901 : }
902 :
903 15 : const char *mode_name = updateDecodingMode (param);
904 :
905 15 : if (g_strcmp0 (mode_name, "yolov8-obb") == 0) {
906 0 : mode = YOLOV8_ORIENTED_BOUNDING_BOX;
907 : }
908 15 : bdata = getProperties (mode_name);
909 :
910 15 : if (bdata == nullptr) {
911 0 : nns_loge ("Could not find box properties name %s", param);
912 0 : return FALSE;
913 : }
914 :
915 15 : return TRUE;
916 : }
917 :
918 : /**
919 : * @brief Set label path of bounding box
920 : */
921 : int
922 13 : BoundingBox::setLabelPath (const char *param)
923 : {
924 13 : if (mode == MP_PALM_DETECTION_BOUNDING_BOX) {
925 : /* palm detection does not need label information */
926 0 : return TRUE;
927 : }
928 :
929 13 : if (NULL != label_path)
930 0 : g_free (label_path);
931 13 : label_path = g_strdup (param);
932 :
933 13 : if (NULL != label_path)
934 13 : loadImageLabels (label_path, &labeldata);
935 :
936 13 : if (labeldata.total_labels > 0) {
937 13 : bdata->setTotalLabels (labeldata.total_labels);
938 13 : return TRUE;
939 : } else
940 0 : return FALSE;
941 : /** @todo Do not die for this */
942 : }
943 :
944 : /**
945 : * @brief Set video size of bounding box
946 : */
947 : int
948 15 : BoundingBox::setVideoSize (const char *param)
949 : {
950 : tensor_dim dim;
951 : int rank;
952 :
953 15 : if (param == NULL || *param == '\0')
954 0 : return TRUE;
955 15 : rank = gst_tensor_parse_dimension (param, dim);
956 15 : width = 0;
957 15 : height = 0;
958 :
959 15 : if (rank < 2) {
960 0 : GST_ERROR ("mode-option-2 of boundingbox is video output dimension (WIDTH:HEIGHT). The given parameter, \"%s\", is not acceptable.",
961 : param);
962 0 : return TRUE; /* Ignore this param */
963 : }
964 15 : if (rank > 2) {
965 0 : GST_WARNING ("mode-option-2 of boundingbox is video output dimension (WIDTH:HEIGHT). The third and later elements of the given parameter, \"%s\", are ignored.",
966 : param);
967 : }
968 15 : width = dim[0];
969 15 : height = dim[1];
970 15 : return TRUE;
971 : }
972 :
973 : /**
974 : * @brief Set input model size of bounding box
975 : */
976 : int
977 15 : BoundingBox::setInputModelSize (const char *param)
978 : {
979 : tensor_dim dim;
980 : int rank;
981 15 : if (param == NULL || *param == '\0')
982 0 : return TRUE;
983 :
984 15 : rank = gst_tensor_parse_dimension (param, dim);
985 15 : bdata->setInputWidth (0);
986 15 : bdata->setInputHeight (0);
987 :
988 15 : if (rank < 2) {
989 0 : GST_ERROR ("mode-option-3 of boundingbox is input video dimension (WIDTH:HEIGHT). The given parameter, \"%s\", is not acceptable.",
990 : param);
991 0 : return TRUE; /* Ignore this param */
992 : }
993 15 : if (rank > 2) {
994 0 : GST_WARNING ("mode-option-3 of boundingbox is input video dimension (WIDTH:HEIGHT). The third and later elements of the given parameter, \"%s\", are ignored.",
995 : param);
996 : }
997 15 : bdata->setInputWidth (dim[0]);
998 15 : bdata->setInputHeight (dim[1]);
999 15 : return TRUE;
1000 : }
1001 :
1002 : /**
1003 : * @brief Set option of bounding box
1004 : */
1005 : int
1006 77 : BoundingBox::setOption (BoundingBoxOption option, const char *param)
1007 : {
1008 77 : if (option == BoundingBoxOption::MODE) {
1009 15 : return setBoxDecodingMode (param);
1010 62 : } else if (option == BoundingBoxOption::LABEL_PATH) {
1011 13 : return setLabelPath (param);
1012 49 : } else if (option == BoundingBoxOption::INTERNAL) {
1013 : /* option3 = per-decoding-mode option */
1014 11 : return bdata->setOptionInternal (param);
1015 38 : } else if (option == BoundingBoxOption::VIDEO_SIZE) {
1016 15 : return setVideoSize (param);
1017 23 : } else if (option == BoundingBoxOption::INPUT_MODEL_SIZE) {
1018 15 : return setInputModelSize (param);
1019 8 : } else if (option == BoundingBoxOption::TRACK) {
1020 4 : is_track = (int) g_ascii_strtoll (param, NULL, 10);
1021 4 : return TRUE;
1022 4 : } else if (option == BoundingBoxOption::LOG) {
1023 4 : do_log = (int) g_ascii_strtoll (param, NULL, 10);
1024 4 : return TRUE;
1025 : }
1026 :
1027 : /**
1028 : * @todo Accept color / border-width / ... with option-2
1029 : */
1030 0 : GST_INFO ("Property mode-option-%d is ignored", static_cast<int> (option) + 1);
1031 0 : return TRUE;
1032 : }
1033 :
1034 : /**
1035 : * @brief Get out caps of bounding box
1036 : */
1037 : GstCaps *
1038 144 : BoundingBox::getOutCaps (const GstTensorsConfig *config)
1039 : {
1040 : GstCaps *caps;
1041 : char *str;
1042 :
1043 144 : int ret = bdata->checkCompatible (config);
1044 144 : if (!ret)
1045 88 : return NULL;
1046 :
1047 56 : str = g_strdup_printf ("video/x-raw, format = RGBA, " /* Use alpha channel to make the background transparent */
1048 : "width = %u, height = %u",
1049 : width, height);
1050 56 : caps = gst_caps_from_string (str);
1051 56 : setFramerateFromConfig (caps, config);
1052 56 : g_free (str);
1053 :
1054 56 : return caps;
1055 : }
1056 :
1057 : /**
1058 : * @brief Decode input memory to out buffer
1059 : * @param[in] config The structure of input tensor info.
1060 : * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
1061 : * @param[out] outbuf A sub-plugin should update or append proper memory for the negotiated media type.
1062 : */
1063 : GstFlowReturn
1064 26 : BoundingBox::decode (const GstTensorsConfig *config,
1065 : const GstTensorMemory *input, GstBuffer *outbuf)
1066 : {
1067 26 : const size_t size = (size_t) width * height * 4; /* RGBA */
1068 : GstMapInfo out_info;
1069 : GstMemory *out_mem;
1070 26 : GArray *results = NULL;
1071 : gboolean need_output_alloc;
1072 :
1073 26 : g_assert (outbuf);
1074 26 : need_output_alloc = gst_buffer_get_size (outbuf) == 0;
1075 :
1076 26 : if (checkLabelProps ())
1077 22 : flag_use_label = TRUE;
1078 : else
1079 4 : flag_use_label = FALSE;
1080 :
1081 : /* Ensure we have outbuf properly allocated */
1082 26 : if (need_output_alloc) {
1083 10 : out_mem = gst_allocator_alloc (NULL, size, NULL);
1084 : } else {
1085 16 : if (gst_buffer_get_size (outbuf) < size) {
1086 0 : gst_buffer_set_size (outbuf, size);
1087 : }
1088 16 : out_mem = gst_buffer_get_all_memory (outbuf);
1089 : }
1090 26 : if (!gst_memory_map (out_mem, &out_info, GST_MAP_WRITE)) {
1091 0 : ml_loge ("Cannot map output memory / tensordec-bounding_boxes.\n");
1092 0 : goto error_free;
1093 : }
1094 :
1095 : /* reset the buffer with alpha 0 / black */
1096 26 : memset (out_info.data, 0, size);
1097 :
1098 26 : results = bdata->decode (config, input);
1099 26 : if (results == NULL) {
1100 0 : GST_ERROR ("Failed to get output buffer, unknown mode %d.", mode);
1101 0 : goto error_unmap;
1102 : }
1103 :
1104 26 : if (do_log != 0) {
1105 6 : logBoxes (results);
1106 : }
1107 :
1108 26 : if (is_track != 0) {
1109 3 : updateCentroids (results);
1110 : }
1111 :
1112 26 : draw (&out_info, results);
1113 26 : g_array_free (results, TRUE);
1114 :
1115 26 : gst_memory_unmap (out_mem, &out_info);
1116 :
1117 26 : if (need_output_alloc)
1118 10 : gst_buffer_append_memory (outbuf, out_mem);
1119 : else
1120 16 : gst_buffer_replace_all_memory (outbuf, out_mem);
1121 :
1122 26 : return GST_FLOW_OK;
1123 :
1124 0 : error_unmap:
1125 0 : gst_memory_unmap (out_mem, &out_info);
1126 0 : error_free:
1127 0 : gst_memory_unref (out_mem);
1128 :
1129 0 : return GST_FLOW_ERROR;
1130 : }
1131 :
1132 : /**
1133 : * @brief Get bounding box properties from hash table
1134 : */
1135 : BoxProperties *
1136 135 : BoundingBox::getProperties (const gchar *properties_name)
1137 : {
1138 : gpointer data;
1139 135 : G_LOCK (box_properties_table);
1140 135 : if (properties_table == nullptr) {
1141 15 : properties_table = g_hash_table_new (g_str_hash, g_str_equal);
1142 : }
1143 135 : data = g_hash_table_lookup (properties_table, properties_name);
1144 135 : G_UNLOCK (box_properties_table);
1145 :
1146 135 : return static_cast<BoxProperties *> (data);
1147 : }
1148 :
1149 : /**
1150 : * @brief Add bounding box properties into hash table
1151 : */
1152 : gboolean
1153 120 : BoundingBox::addProperties (BoxProperties *boxProperties)
1154 : {
1155 : BoxProperties *data;
1156 : gboolean ret;
1157 :
1158 120 : data = getProperties (boxProperties->name);
1159 120 : if (NULL != data) {
1160 0 : return TRUE;
1161 : }
1162 :
1163 120 : G_LOCK (box_properties_table);
1164 120 : ret = g_hash_table_insert (properties_table, boxProperties->name, boxProperties);
1165 120 : G_UNLOCK (box_properties_table);
1166 :
1167 120 : return ret;
1168 : }
|