Line data Source code
1 : /**
2 : * GStreamer / NNStreamer tensor_decoder subplugin, "bounding boxes"
3 : * Copyright (C) 2018 Samsung Electronics Co. Ltd.
4 : * Copyright (C) 2018 MyungJoo Ham <myungjoo.ham@samsung.com>
5 : * Copyright 2021 NXP
6 : *
7 : * This library is free software; you can redistribute it and/or
8 : * modify it under the terms of the GNU Library General Public
9 : * License as published by the Free Software Foundation;
10 : * version 2.1 of the License.
11 : *
12 : * This library is distributed in the hope that it will be useful,
13 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : * Library General Public License for more details.
16 : *
17 : */
18 : /**
19 : * @file tensordec-boundingbox.cc
20 : * @date 15 Nov 2018
21 : * @brief NNStreamer tensor-decoder subplugin, "bounding boxes",
22 : * which converts tensors to video stream w/ boxes on
23 : * transparent background.
24 : * This code is NYI/WIP and not compilable.
25 : *
26 : * @see https://github.com/nnstreamer/nnstreamer
27 : * @author MyungJoo Ham <myungjoo.ham@samsung.com>
28 : * @bug No known bugs except for NYI items
29 : */
30 :
31 : #include <glib.h>
32 :
33 : #include <nnstreamer_plugin_api.h>
34 : #include <nnstreamer_plugin_api_decoder.h>
35 :
36 : #include <stdint.h>
37 : #include <stdlib.h>
38 : #include <string.h>
39 : #include "tensordec-boundingbox.h"
40 :
41 : #ifdef __cplusplus
42 : extern "C" {
43 : #endif /* __cplusplus */
44 : void init_bb (void) __attribute__ ((constructor));
45 : void fini_bb (void) __attribute__ ((destructor));
46 : #ifdef __cplusplus
47 : }
48 : #endif /* __cplusplus */
49 :
50 : /**
51 : * @brief mutex for box properties table.
52 : */
53 : G_LOCK_DEFINE_STATIC (box_properties_table);
54 :
55 : /* font.c */
56 : extern uint8_t rasters[][13];
57 :
58 : /**
59 : * @todo Fill in the value at build time or hardcode this. It's const value
60 : * @brief The bitmap of characters
61 : * [Character (ASCII)][Height][Width]
62 : */
63 : static singleLineSprite_t singleLineSprite;
64 :
65 : /**
66 : * @brief List of bounding-box decoding schemes in string
67 : */
68 : static const char *bb_modes[] = {
69 : [MOBILENET_SSD_BOUNDING_BOX] = "mobilenet-ssd",
70 : [MOBILENET_SSD_PP_BOUNDING_BOX] = "mobilenet-ssd-postprocess",
71 : [OV_PERSON_DETECTION_BOUNDING_BOX] = "ov-person-detection",
72 : [OV_FACE_DETECTION_BOUNDING_BOX] = "ov-face-detection",
73 : [OLDNAME_MOBILENET_SSD_BOUNDING_BOX] = "tflite-ssd",
74 : [OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX] = "tf-ssd",
75 : [YOLOV5_BOUNDING_BOX] = "yolov5",
76 : [MP_PALM_DETECTION_BOUNDING_BOX] = "mp-palm-detection",
77 : [YOLOV8_BOUNDING_BOX] = "yolov8",
78 : [YOLOV8_ORIENTED_BOUNDING_BOX] = "yolov8-obb",
79 : NULL,
80 : };
81 :
82 : /**
83 : * @brief Change deprecated mode name
84 : */
85 : static const char *
86 15 : updateDecodingMode (const char *param)
87 : {
88 15 : if (g_strcmp0 (param, bb_modes[OLDNAME_MOBILENET_SSD_BOUNDING_BOX]) == 0) {
89 2 : return bb_modes[MOBILENET_SSD_BOUNDING_BOX];
90 : }
91 :
92 13 : if (g_strcmp0 (param, bb_modes[OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX]) == 0) {
93 2 : return bb_modes[MOBILENET_SSD_PP_BOUNDING_BOX];
94 : }
95 :
96 11 : return param;
97 : }
98 :
99 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
100 : static int
101 15 : bb_init (void **pdata)
102 : {
103 : /** @todo check if we need to ensure plugin_data is not yet allocated */
104 : try {
105 15 : BoundingBox *bdata = new BoundingBox ();
106 15 : *pdata = bdata;
107 0 : } catch (...) {
108 0 : GST_ERROR ("Failed to allocate memory for decoder subplugin.");
109 0 : return FALSE;
110 0 : }
111 :
112 15 : initSingleLineSprite (singleLineSprite, rasters, PIXEL_VALUE);
113 :
114 15 : return TRUE;
115 : }
116 :
117 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
118 : static void
119 15 : bb_exit (void **pdata)
120 : {
121 15 : BoundingBox *bdata = static_cast<BoundingBox *> (*pdata);
122 15 : delete bdata;
123 15 : *pdata = NULL;
124 15 : }
125 :
126 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
127 : static int
128 77 : bb_setOption (void **pdata, int opNum, const char *param)
129 : {
130 77 : BoundingBox *bdata = static_cast<BoundingBox *> (*pdata);
131 77 : BoundingBoxOption option = static_cast<BoundingBoxOption> (opNum);
132 77 : return bdata->setOption (option, param);
133 : }
134 :
135 : /**
136 : * @brief tensordec-plugin's GstTensorDecoderDef callback
137 : *
138 : * [Mobilenet SSD Model]
139 : * The first tensor is boxes. BOX_SIZE : 1 : #MaxDetection, ANY-TYPE
140 : * The second tensor is labels. #MaxLabel : #MaxDetection, ANY-TYPE
141 : * Both tensors are MANDATORY!
142 : *
143 : * [Mobilenet SSD Postprocess Model]
144 : * Tensors mapping is defined through option-3, with following syntax:
145 : * LOCATIONS_IDX:CLASSES_IDX:SCORES_IDX:NUM_DETECTION_IDX
146 : *
147 : * Default configuration is: 3:1:2:0
148 : *
149 : * num_detection (default 1st tensor). 1, ANY-TYPE
150 : * detection_classes (default 2nd tensor). #MaxDetection, ANY-TYPE
151 : * detection_scores (default 3rd tensor). #MaxDetection, ANY-TYPE
152 : * detection_boxes (default 4th tensor). BOX_SIZE : #MaxDetection, ANY-TYPE
153 : *
154 : * all of tensors are MANDATORY!
155 : *
156 : * If there are third or more tensors, such tensors will be ignored.
157 : */
158 : static GstCaps *
159 72 : bb_getOutCaps (void **pdata, const GstTensorsConfig *config)
160 : {
161 : /** @todo this is compatible with "SSD" only. expand the capability! */
162 72 : BoundingBox *bdata = static_cast<BoundingBox *> (*pdata);
163 72 : return bdata->getOutCaps (config);
164 : }
165 :
166 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
167 : static GstFlowReturn
168 26 : bb_decode (void **pdata, const GstTensorsConfig *config,
169 : const GstTensorMemory *input, GstBuffer *outbuf)
170 : {
171 26 : BoundingBox *bdata = static_cast<BoundingBox *> (*pdata);
172 26 : return bdata->decode (config, input, outbuf);
173 : }
174 :
175 : /** @brief tensordec-plugin's GstTensorDecoderDef callback */
176 : static size_t
177 10 : bb_getTransformSize (void **pdata, const GstTensorsConfig *config,
178 : GstCaps *caps, size_t size, GstCaps *othercaps, GstPadDirection direction)
179 : {
180 : UNUSED (pdata);
181 : UNUSED (config);
182 : UNUSED (caps);
183 : UNUSED (size);
184 : UNUSED (othercaps);
185 : UNUSED (direction);
186 :
187 10 : return 0;
188 : /** @todo Use appropriate values */
189 : }
190 :
191 : static gchar decoder_subplugin_bounding_box[] = "bounding_boxes";
192 :
193 : /** @brief Bounding box tensordec-plugin GstTensorDecoderDef instance */
194 : static GstTensorDecoderDef boundingBox = { .modename = decoder_subplugin_bounding_box,
195 : .init = bb_init,
196 : .exit = bb_exit,
197 : .setOption = bb_setOption,
198 : .getOutCaps = bb_getOutCaps,
199 : .decode = bb_decode,
200 : .getTransformSize = bb_getTransformSize };
201 :
202 : static gchar *custom_prop_desc = NULL;
203 :
204 : /** @brief Initialize this object for tensordec-plugin */
205 : void
206 31 : init_bb (void)
207 : {
208 31 : nnstreamer_decoder_probe (&boundingBox);
209 :
210 : {
211 31 : g_autofree gchar *sub_desc = g_strjoinv ("|", (GStrv) bb_modes);
212 :
213 31 : g_free (custom_prop_desc);
214 31 : custom_prop_desc = g_strdup_printf ("Decoder mode of bounding box: [%s]", sub_desc);
215 :
216 31 : nnstreamer_decoder_set_custom_property_desc (decoder_subplugin_bounding_box,
217 : "option1", custom_prop_desc, "option2",
218 : "Location of the label file. This is independent from option1.", "option3",
219 : "Sub-option values that depend on option1;\n"
220 : "\tfor yolov5 and yolov8 mode:\n"
221 : "\t\tThe option3 requires up to 3 numbers, which tell\n"
222 : "\t\t- whether the output values are scaled or not\n"
223 : "\t\t 0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)\n"
224 : "\t\t- the threshold of confidence (optional, default set to 0.25)\n"
225 : "\t\t- the threshold of IOU (optional, default set to 0.45)\n"
226 : "\t\tAn example of option3 is option3 = 0: 0.65:0.6 \n"
227 : "\tfor yolov8 obb mode:\n"
228 : "\t\tThe option3 requires up to 4 numbers, which tell\n"
229 : "\t\t- whether the output values are scaled or not\n"
230 : "\t\t 0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)\n"
231 : "\t\t- the threshold of confidence (optional, default set to 0.25)\n"
232 : "\t\t- the threshold of IOU (optional, default set to 0.45)\n"
233 : "\t\t- whether to output oriented bounding boxes\n"
234 : "\t\t 0: axis-aligned boxes, 1: oriented boxes (default)\n"
235 : "\t\tAn example of option3 is option3=1:0.5:0.5:1\n"
236 : "\tfor mobilenet-ssd mode:\n"
237 : "\t\tThe option3 definition scheme is, in order, as follows\n"
238 : "\t\t- box priors location file (mandatory)\n"
239 : "\t\t- detection threshold (optional, default set to 0.5)box priors location file (mandatory)\n"
240 : "\t\t- Y box scale (optional, default set to 10.0)\n"
241 : "\t\t- X box scale (optional, default set to 10.0)\n"
242 : "\t\t- H box scale (optional, default set to 5.0)\n"
243 : "\t\t- W box scale (optional, default set to 5.0)\n"
244 : "\t\tThe default parameters value could be set in the following ways:\n"
245 : "\t\t option3=box-priors.txt:0.5:10.0:10.0:5.0:5.0:0.5\n"
246 : "\t\t option3=box-priors.txt\n"
247 : "\t\t option3=box-priors.txt::::::\n"
248 : "\t\tIt's possible to set only few values, using the default values for those not specified through the command line.\n"
249 : "\t\tYou could specify respectively the detection and IOU thresholds to 0.65 and 0.6 with the option3 parameter as follow:\n"
250 : "\t\t option3=box-priors.txt:0.65:::::0.6\n"
251 : "\tfor mobilenet-ssd-postprocess mode:\n"
252 : "\t\tThe option3 is required to have 5 integer numbers, which tell the tensor-dec how to interpret the given tensor inputs.\n"
253 : "\t\tThe first 4 numbers separated by colon, \':\', designate which are location:class:score:number of the tensors.\n"
254 : "\t\tThe last number separated by comma, ',\' from the first 4 numbers designate the threshold in percent.\n"
255 : "\t\tIn other words, \"option3=%i:%i:%i:%i,%i\"\n"
256 : "\tfor mp-palm-detection mode:\n"
257 : "\t\tThe option3 is required to have five float numbers, as follows;\n"
258 : "\t\t- box score threshold (mandatory)\n"
259 : "\t\t- number of layers for anchor generation (optional, default set to 4)\n"
260 : "\t\t- minimum scale factor for anchor generation (optional, default set to 1.0)\n"
261 : "\t\t- maximum scale factor for anchor generation (optional, default set to 1.0)\n"
262 : "\t\t- X offset (optional, default set to 0.5)\n"
263 : "\t\t- Y offset (optional, default set to 0.5)\n"
264 : "\t\t- strides for each layer for anchor generation (optional, default set to 8:16:16:16)\n"
265 : "\t\tThe default parameter value could be set in the following ways:\n"
266 : "\t\t option3=0.5\n"
267 : "\t\t option3=0.5:4:0.2:0.8\n"
268 : "\t\t option3=0.5:4:1.0:1.0:0.5:0.5:8:16:16:16",
269 : "option4", "Video Output Dimension (WIDTH:HEIGHT). This is independent from option1.",
270 : "option5", "Input Dimension (WIDTH:HEIGHT). This is independent from option1.", "option6",
271 : "Whether to track result bounding boxes or not\n"
272 : "\t\t 0 (default, do not track)\n"
273 : "\t\t 1 (track result bounding boxes, with naive centroid based algorithm)",
274 : "option7",
275 : "Whether to log the result bounding boxes or not\n"
276 : "\t\t 0 (default, do not log)\n"
277 : "\t\t 1 (log result bounding boxes)"
278 : "\tThis is independent from option1",
279 : "option8", "Box Style (NYI)", NULL);
280 31 : }
281 31 : }
282 :
283 : /** @brief Destruct this object for tensordec-plugin */
284 : void
285 31 : fini_bb (void)
286 : {
287 31 : g_free (custom_prop_desc);
288 31 : custom_prop_desc = NULL;
289 31 : nnstreamer_decoder_exit (boundingBox.modename);
290 31 : }
291 :
292 : /** @brief Compare function for sorting distances. */
293 : static int
294 96 : distance_compare (const void *a, const void *b)
295 : {
296 96 : const distanceArrayData *da = (const distanceArrayData *) a;
297 96 : const distanceArrayData *db = (const distanceArrayData *) b;
298 :
299 96 : if (da->distance < db->distance)
300 34 : return -1;
301 62 : if (da->distance > db->distance)
302 42 : return 1;
303 20 : return 0;
304 : }
305 :
306 : /**
307 : * @brief Compare Function for g_array_sort with detectedObject.
308 : */
309 : static gint
310 1748 : compare_detection (gconstpointer _a, gconstpointer _b)
311 : {
312 1748 : const detectedObject *a = static_cast<const detectedObject *> (_a);
313 1748 : const detectedObject *b = static_cast<const detectedObject *> (_b);
314 :
315 : /* Larger comes first */
316 1748 : return (a->prob > b->prob) ? -1 : ((a->prob == b->prob) ? 0 : 1);
317 : }
318 :
319 : /**
320 : * @brief Calculate the intersected surface
321 : */
322 : static gfloat
323 850 : iou (detectedObject *a, detectedObject *b)
324 : {
325 850 : int x1 = MAX (a->x, b->x);
326 850 : int y1 = MAX (a->y, b->y);
327 850 : int x2 = MIN (a->x + a->width, b->x + b->width);
328 850 : int y2 = MIN (a->y + a->height, b->y + b->height);
329 850 : int w = MAX (0, (x2 - x1 + 1));
330 850 : int h = MAX (0, (y2 - y1 + 1));
331 850 : float inter = w * h;
332 850 : float areaA = a->width * a->height;
333 850 : float areaB = b->width * b->height;
334 850 : float o = inter / (areaA + areaB - inter);
335 850 : return (o >= 0) ? o : 0;
336 : }
337 :
338 : /**
339 : * @brief Calculate the corners using center position and angle.
340 : * @note The angle (obj->angle) must be radian.
341 : */
342 : static void
343 0 : get_rotated_rect_corners (detectedObject *obj, Point corners[4])
344 : {
345 0 : float cx = obj->x;
346 0 : float cy = obj->y;
347 0 : float angle = obj->angle;
348 :
349 0 : float cos_a = cos (angle);
350 0 : float sin_a = sin (angle);
351 :
352 0 : float half_w = static_cast<float> (obj->width) / 2;
353 0 : float half_h = static_cast<float> (obj->height) / 2;
354 :
355 0 : float dx[4] = { -half_w, half_w, half_w, -half_w };
356 0 : float dy[4] = { -half_h, -half_h, half_h, half_h };
357 :
358 0 : for (int i = 0; i < 4; i++) {
359 0 : corners[i].x = cx + dx[i] * cos_a - dy[i] * sin_a;
360 0 : corners[i].y = cy + dx[i] * sin_a + dy[i] * cos_a;
361 : }
362 0 : }
363 :
364 : /**
365 : * @brief Check whether point is in the polygon
366 : */
367 : static bool
368 0 : is_point_inside (Point point, const Point *polygon, int n)
369 : {
370 0 : int intersections = 0;
371 :
372 0 : for (int i = 0; i < n; i++) {
373 0 : Point p1 = polygon[i];
374 0 : Point p2 = polygon[(i + 1) % n];
375 :
376 0 : if ((p1.y > point.y) != (p2.y > point.y)) {
377 0 : float intersect_x = p1.x + (point.y - p1.y) * (p2.x - p1.x) / (p2.y - p1.y);
378 :
379 0 : if (intersect_x > point.x) {
380 0 : intersections++;
381 : }
382 : }
383 : }
384 :
385 0 : return (intersections % 2 == 1);
386 : }
387 :
388 : /**
389 : * @brief iou for obb
390 : */
391 : static float
392 0 : iou_obb (detectedObject *a, detectedObject *b)
393 : {
394 : Point corners_a[4], corners_b[4];
395 0 : get_rotated_rect_corners (a, corners_a);
396 0 : get_rotated_rect_corners (b, corners_b);
397 :
398 0 : float area_a = a->width * a->height;
399 0 : float area_b = b->width * b->height;
400 :
401 0 : Point *small_corners = (area_a < area_b) ? corners_a : corners_b;
402 0 : float small_width = (area_a < area_b) ? a->width : b->width;
403 0 : float small_height = (area_a < area_b) ? a->height : b->height;
404 0 : float small_angle = (area_a < area_b) ? a->angle : b->angle;
405 :
406 0 : int grid_x = 5, grid_y = 5;
407 0 : int overlap_count = 0, total_count = 0;
408 :
409 0 : float step_x = small_width / (grid_x + 1);
410 0 : float step_y = small_height / (grid_y + 1);
411 :
412 0 : for (int i = 0; i <= grid_x; i++) {
413 0 : for (int j = 0; j <= grid_y; j++) {
414 : Point test_point;
415 0 : test_point.x = small_corners[0].x + (i + 0.5) * step_x * cos (small_angle)
416 0 : - (j + 0.5) * step_y * sin (small_angle);
417 0 : test_point.y = small_corners[0].y + (i + 0.5) * step_x * sin (small_angle)
418 0 : + (j + 0.5) * step_y * cos (small_angle);
419 :
420 0 : total_count++;
421 :
422 0 : if (is_point_inside (test_point, corners_a, 4)
423 0 : && is_point_inside (test_point, corners_b, 4)) {
424 0 : overlap_count++;
425 : }
426 : }
427 : }
428 :
429 0 : float overlap_area = (float) overlap_count / total_count * small_width * small_height;
430 0 : float union_area = area_a + area_b - overlap_area;
431 :
432 0 : return overlap_area / union_area;
433 : }
434 :
435 : /**
436 : * @brief Apply NMS to the given results (objects[DETECTION_MAX])
437 : */
438 : void
439 17 : nms (GArray *results, gfloat threshold, bounding_box_modes mode)
440 : {
441 : guint boxes_size;
442 : guint i, j;
443 :
444 17 : boxes_size = results->len;
445 17 : if (boxes_size == 0U)
446 0 : return;
447 :
448 17 : g_array_sort (results, compare_detection);
449 :
450 474 : for (i = 0; i < boxes_size; i++) {
451 457 : detectedObject *a = &g_array_index (results, detectedObject, i);
452 457 : if (a->valid == TRUE) {
453 1225 : for (j = i + 1; j < boxes_size; j++) {
454 1174 : detectedObject *b = &g_array_index (results, detectedObject, j);
455 1174 : if (b->valid == TRUE) {
456 850 : float iou_value = 0.0f;
457 850 : if (mode == YOLOV8_ORIENTED_BOUNDING_BOX) {
458 0 : iou_value = iou_obb (a, b);
459 : } else {
460 850 : iou_value = iou (a, b);
461 : }
462 850 : if (iou_value > threshold) {
463 406 : b->valid = FALSE;
464 : }
465 : }
466 : }
467 : }
468 : }
469 :
470 17 : i = 0;
471 : do {
472 457 : detectedObject *a = &g_array_index (results, detectedObject, i);
473 457 : if (a->valid == FALSE)
474 406 : g_array_remove_index (results, i);
475 : else
476 51 : i++;
477 457 : } while (i < results->len);
478 : }
479 :
480 : /**
481 : * @brief check the num_tensors is valid
482 : */
483 : int
484 72 : check_tensors (const GstTensorsConfig *config, const unsigned int limit)
485 : {
486 : unsigned int i;
487 : GstTensorInfo *_info, *_base;
488 :
489 72 : g_return_val_if_fail (config != NULL, FALSE);
490 72 : g_return_val_if_fail (config->info.num_tensors >= limit, FALSE);
491 :
492 72 : if (config->info.num_tensors > limit) {
493 0 : GST_WARNING ("tensor-decoder:boundingbox accepts %d or less tensors. "
494 : "You are wasting the bandwidth by supplying %d tensors.",
495 : limit, config->info.num_tensors);
496 : }
497 :
498 : /* tensor-type of the tensors should be the same */
499 72 : if (config->info.num_tensors > 0) {
500 72 : _base = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, 0);
501 :
502 144 : for (i = 1; i < config->info.num_tensors; ++i) {
503 72 : _info = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, i);
504 72 : g_return_val_if_fail (_base->type == _info->type, FALSE);
505 : }
506 : }
507 :
508 72 : return TRUE;
509 : }
510 :
511 : /** @brief Constructor of BoundingBox */
512 15 : BoundingBox::BoundingBox ()
513 : {
514 15 : mode = BOUNDING_BOX_UNKNOWN;
515 15 : width = 0;
516 15 : height = 0;
517 15 : flag_use_label = FALSE;
518 15 : do_log = 0;
519 :
520 : /* for track */
521 15 : is_track = 0;
522 15 : centroids_last_id = 0U;
523 15 : max_centroids_num = 100U;
524 15 : consecutive_disappear_threshold = 100U;
525 15 : centroids = g_array_sized_new (TRUE, TRUE, sizeof (centroid), max_centroids_num);
526 30 : distanceArray = g_array_sized_new (TRUE, TRUE, sizeof (distanceArrayData),
527 15 : max_centroids_num * max_centroids_num);
528 :
529 15 : label_path = nullptr;
530 15 : labeldata.labels = nullptr;
531 15 : labeldata.max_word_length = 0;
532 15 : labeldata.total_labels = 0;
533 15 : bdata = nullptr;
534 15 : }
535 :
536 : /** @brief destructor of BoundingBox */
537 15 : BoundingBox::~BoundingBox ()
538 : {
539 15 : _free_labels (&labeldata);
540 :
541 15 : if (label_path)
542 13 : g_free (label_path);
543 :
544 15 : g_array_free (centroids, TRUE);
545 15 : g_array_free (distanceArray, TRUE);
546 15 : }
547 :
548 : /**
549 : * @brief Update centroids with given bounding boxes.
550 : */
551 : void
552 3 : BoundingBox::updateCentroids (GArray *boxes)
553 : {
554 : guint i, j;
555 3 : if (boxes->len > max_centroids_num) {
556 0 : nns_logw ("updateCentroids: too many detected objects");
557 0 : return;
558 : }
559 : /* remove disappeared centroids */
560 3 : i = 0;
561 11 : while (i < centroids->len) {
562 8 : centroid *c = &g_array_index (centroids, centroid, i);
563 8 : if (c->consecutive_disappeared_frames >= consecutive_disappear_threshold) {
564 0 : g_array_remove_index (centroids, i);
565 : } else {
566 8 : i++;
567 : }
568 : }
569 :
570 3 : if (centroids->len > max_centroids_num) {
571 0 : nns_logw ("update_centroids: too many detected centroids");
572 0 : return;
573 : }
574 : /* if boxes is empty */
575 3 : if (boxes->len == 0U) {
576 : guint i;
577 0 : for (i = 0; i < centroids->len; i++) {
578 0 : centroid *c = &g_array_index (centroids, centroid, i);
579 :
580 0 : if (c->id > 0)
581 0 : c->consecutive_disappeared_frames++;
582 : }
583 :
584 0 : return;
585 : }
586 : /* initialize centroids with given boxes */
587 3 : if (centroids->len == 0U) {
588 : guint i;
589 5 : for (i = 0; i < boxes->len; i++) {
590 4 : detectedObject *box = &g_array_index (boxes, detectedObject, i);
591 : centroid c;
592 :
593 4 : centroids_last_id++;
594 4 : c.id = centroids_last_id;
595 4 : c.consecutive_disappeared_frames = 0;
596 4 : c.cx = box->x + box->width / 2;
597 4 : c.cy = box->y + box->height / 2;
598 4 : c.matched_box_idx = i;
599 :
600 4 : g_array_append_val (centroids, c);
601 :
602 4 : box->tracking_id = c.id;
603 : }
604 :
605 1 : return;
606 : }
607 : /* calculate the distance among centroids and boxes */
608 2 : g_array_set_size (distanceArray, centroids->len * boxes->len);
609 :
610 10 : for (i = 0; i < centroids->len; i++) {
611 8 : centroid *c = &g_array_index (centroids, centroid, i);
612 8 : c->matched_box_idx = G_MAXUINT32;
613 :
614 40 : for (j = 0; j < boxes->len; j++) {
615 32 : detectedObject *box = &g_array_index (boxes, detectedObject, j);
616 32 : distanceArrayData *d
617 32 : = &g_array_index (distanceArray, distanceArrayData, i * centroids->len + j);
618 :
619 32 : d->centroid_idx = i;
620 32 : d->box_idx = j;
621 :
622 : /* invalid centroid */
623 32 : if (c->id == 0) {
624 0 : d->distance = G_MAXUINT64;
625 : } else {
626 : /* calculate euclidean distance */
627 32 : int bcx = box->x + box->width / 2;
628 32 : int bcy = box->y + box->height / 2;
629 :
630 32 : d->distance = (guint64) (c->cx - bcx) * (c->cx - bcx)
631 32 : + (guint64) (c->cy - bcy) * (c->cy - bcy);
632 : }
633 : }
634 : }
635 :
636 2 : g_array_sort (distanceArray, distance_compare);
637 :
638 : {
639 : /* Starting from the least distance pair (centroid, box), matching each other */
640 : guint dIdx, cIdx, bIdx;
641 :
642 34 : for (dIdx = 0; dIdx < distanceArray->len; dIdx++) {
643 32 : distanceArrayData *d = &g_array_index (distanceArray, distanceArrayData, dIdx);
644 32 : centroid *c = &g_array_index (centroids, centroid, d->centroid_idx);
645 32 : detectedObject *box = &g_array_index (boxes, detectedObject, d->box_idx);
646 :
647 32 : bIdx = d->box_idx;
648 :
649 : /* the centroid is invalid */
650 32 : if (c->id == 0) {
651 0 : continue;
652 : }
653 : /* the box is already assigned to a centroid */
654 32 : if (box->tracking_id != 0) {
655 24 : continue;
656 : }
657 : /* the centroid is already assigned to a box */
658 8 : if (c->matched_box_idx != G_MAXUINT32) {
659 0 : continue;
660 : }
661 : /* now match the box with the centroid */
662 8 : c->matched_box_idx = bIdx;
663 8 : box->tracking_id = c->id;
664 8 : c->consecutive_disappeared_frames = 0;
665 : }
666 :
667 : /* increase consecutive_disappeared_frames of unmatched centroids */
668 10 : for (cIdx = 0; cIdx < centroids->len; cIdx++) {
669 8 : centroid *c = &g_array_index (centroids, centroid, cIdx);
670 :
671 8 : if (c->id == 0) {
672 0 : continue;
673 : }
674 :
675 8 : if (c->matched_box_idx == G_MAXUINT32) {
676 0 : c->consecutive_disappeared_frames++;
677 : }
678 : }
679 :
680 : /* for those unmatched boxes - register as new centroids */
681 10 : for (bIdx = 0; bIdx < boxes->len; bIdx++) {
682 8 : detectedObject *box = &g_array_index (boxes, detectedObject, bIdx);
683 : centroid c;
684 :
685 8 : if (box->tracking_id != 0) {
686 8 : continue;
687 : }
688 :
689 0 : centroids_last_id++;
690 0 : c.id = centroids_last_id;
691 0 : c.consecutive_disappeared_frames = 0;
692 0 : c.cx = box->x + box->width / 2;
693 0 : c.cy = box->y + box->height / 2;
694 0 : c.matched_box_idx = bIdx;
695 :
696 0 : g_array_append_val (centroids, c);
697 :
698 0 : box->tracking_id = c.id;
699 : }
700 : }
701 : }
702 :
703 : /**
704 : * @brief Draw a line on the frame buffer.
705 : */
706 : static void
707 0 : draw_line (uint32_t *frame, int width, int height, int x0, int y0, int x1, int y1, uint32_t color)
708 : {
709 0 : int dx = abs (x1 - x0), sx = x0 < x1 ? 1 : -1;
710 0 : int dy = -abs (y1 - y0), sy = y0 < y1 ? 1 : -1;
711 0 : int err = dx + dy, e2;
712 :
713 : while (1) {
714 0 : if (x0 >= 0 && x0 < width && y0 >= 0 && y0 < height) {
715 0 : frame[y0 * width + x0] = color;
716 : }
717 0 : if (x0 == x1 && y0 == y1)
718 0 : break;
719 0 : e2 = 2 * err;
720 0 : if (e2 >= dy) {
721 0 : err += dy;
722 0 : x0 += sx;
723 : }
724 0 : if (e2 <= dx) {
725 0 : err += dx;
726 0 : y0 += sy;
727 : }
728 : }
729 0 : }
730 :
731 : /**
732 : * @brief Draw with the given results (objects[DETECTION_MAX]) to the output buffer
733 : * @param[out] out_info The output buffer (RGBA plain)
734 : * @param[in] bdata The bounding-box internal data.
735 : * @param[in] results The final results to be drawn.
736 : */
737 : void
738 26 : BoundingBox::draw (GstMapInfo *out_info, GArray *results)
739 : {
740 26 : uint32_t *frame = (uint32_t *) out_info->data; /* Let's draw per pixel (4bytes) */
741 : unsigned int i;
742 : guint i_width, i_height;
743 :
744 26 : i_width = bdata->getInputWidth ();
745 26 : i_height = bdata->getInputHeight ();
746 :
747 110 : for (i = 0; i < results->len; i++) {
748 84 : detectedObject *a = &g_array_index (results, detectedObject, i);
749 :
750 84 : if ((flag_use_label)
751 80 : && ((a->class_id < 0 || a->class_id >= (int) labeldata.total_labels))) {
752 : /** @todo make it "logw_once" after we get logw_once API. */
753 0 : ml_logw ("Invalid class found with tensordec-boundingbox.\n");
754 0 : continue;
755 : }
756 :
757 84 : if (mode == YOLOV8_ORIENTED_BOUNDING_BOX) {
758 : /* For rotated boxes */
759 : Point corners[4];
760 0 : get_rotated_rect_corners (a, corners);
761 :
762 : /* Scale the corners to output image size */
763 0 : for (int j = 0; j < 4; j++) {
764 0 : corners[j].x = (width * corners[j].x) / i_width;
765 0 : corners[j].y = (height * corners[j].y) / i_height;
766 : }
767 :
768 : /* Draw lines between the corners */
769 0 : for (int j = 0; j < 4; j++) {
770 0 : int x_start = (int) corners[j].x;
771 0 : int y_start = (int) corners[j].y;
772 0 : int x_end = (int) corners[(j + 1) % 4].x;
773 0 : int y_end = (int) corners[(j + 1) % 4].y;
774 :
775 0 : draw_line (frame, width, height, x_start, y_start, x_end, y_end, PIXEL_VALUE);
776 : }
777 : } else {
778 : int x1, x2, y1, y2; /* Box positions on the output surface */
779 : int j;
780 : uint32_t *pos1, *pos2;
781 : /* 1. Draw Boxes */
782 84 : x1 = (width * a->x) / i_width;
783 84 : x2 = MIN (width - 1, (width * (a->x + a->width)) / i_width);
784 84 : y1 = (height * a->y) / i_height;
785 84 : y2 = MIN (height - 1, (height * (a->y + a->height)) / i_height);
786 :
787 : /* 1-1. Horizontal */
788 84 : pos1 = &frame[y1 * width + x1];
789 84 : pos2 = &frame[y2 * width + x1];
790 5174 : for (j = x1; j <= x2; j++) {
791 5090 : *pos1 = PIXEL_VALUE;
792 5090 : *pos2 = PIXEL_VALUE;
793 5090 : pos1++;
794 5090 : pos2++;
795 : }
796 :
797 : /* 1-2. Vertical */
798 84 : pos1 = &frame[(y1 + 1) * width + x1];
799 84 : pos2 = &frame[(y1 + 1) * width + x2];
800 5950 : for (j = y1 + 1; j < y2; j++) {
801 5866 : *pos1 = PIXEL_VALUE;
802 5866 : *pos2 = PIXEL_VALUE;
803 5866 : pos1 += width;
804 5866 : pos2 += width;
805 : }
806 :
807 : /* 2. Write Labels + tracking ID */
808 84 : if (flag_use_label) {
809 80 : g_autofree gchar *label = NULL;
810 : gsize k, label_len;
811 :
812 80 : if (is_track != 0) {
813 12 : label = g_strdup_printf ("%s-%d", labeldata.labels[a->class_id], a->tracking_id);
814 : } else {
815 68 : label = g_strdup_printf ("%s", labeldata.labels[a->class_id]);
816 : }
817 :
818 80 : label_len = label ? strlen (label) : 0;
819 :
820 : /* x1 is the same: x1 = MAX (0, (width * a->x) / i_width); */
821 80 : y1 = MAX (0, (y1 - 14));
822 80 : pos1 = &frame[y1 * width + x1];
823 475 : for (k = 0; k < label_len; k++) {
824 395 : unsigned int char_index = label[k];
825 395 : if ((x1 + 8) > (int) width)
826 0 : break; /* Stop drawing if it may overfill */
827 395 : pos2 = pos1;
828 5530 : for (y2 = 0; y2 < 13; y2++) {
829 : /* 13 : character height */
830 46215 : for (x2 = 0; x2 < 8; x2++) {
831 : /* 8: character width */
832 41080 : *(pos2 + x2) = singleLineSprite[char_index][y2][x2];
833 : }
834 5135 : pos2 += width;
835 : }
836 395 : x1 += 9;
837 395 : pos1 += 9; /* character width + 1px */
838 : }
839 80 : }
840 : }
841 : }
842 26 : }
843 :
844 : /**
845 : * @brief Log the given results
846 : */
847 : void
848 6 : BoundingBox::logBoxes (GArray *results)
849 : {
850 : guint i;
851 :
852 6 : nns_logi ("Detect %u boxes in %u x %u input image", results->len,
853 : bdata->getInputWidth (), bdata->getInputHeight ());
854 30 : for (i = 0; i < results->len; i++) {
855 24 : detectedObject *b = &g_array_index (results, detectedObject, i);
856 24 : if (labeldata.total_labels > 0) {
857 24 : if (mode == YOLOV8_ORIENTED_BOUNDING_BOX) {
858 0 : nns_logi ("[%s] x:%d y:%d w:%d h:%d angle:%.2f prob:%.4f",
859 : labeldata.labels[b->class_id], b->x, b->y, b->width, b->height,
860 : b->angle, b->prob);
861 : } else {
862 24 : nns_logi ("[%s] x:%d y:%d w:%d h:%d prob:%.4f",
863 : labeldata.labels[b->class_id], b->x, b->y, b->width, b->height, b->prob);
864 : }
865 : } else {
866 0 : if (mode == YOLOV8_ORIENTED_BOUNDING_BOX) {
867 0 : nns_logi ("x:%d y:%d w:%d h:%d angle:%.2f prob:%.4f", b->x, b->y,
868 : b->width, b->height, b->angle, b->prob);
869 : } else {
870 0 : nns_logi ("x:%d y:%d w:%d h:%d prob:%.4f", b->x, b->y, b->width, b->height, b->prob);
871 : }
872 : }
873 : }
874 6 : }
875 :
876 : /**
877 : * @brief Check the label relevant properties are valid
878 : */
879 : gboolean
880 26 : BoundingBox::checkLabelProps ()
881 : {
882 26 : if ((!label_path) || (!labeldata.labels) || (labeldata.total_labels <= 0))
883 4 : return FALSE;
884 22 : return TRUE;
885 : }
886 :
887 : /**
888 : * @brief Set mode of bounding box
889 : */
890 : int
891 15 : BoundingBox::setBoxDecodingMode (const char *param)
892 : {
893 15 : if (NULL == param || *param == '\0') {
894 0 : GST_ERROR ("Please set the valid mode at option1 to set box decoding mode");
895 0 : return FALSE;
896 : }
897 :
898 15 : const char *mode_name = updateDecodingMode (param);
899 :
900 15 : if (g_strcmp0 (mode_name, "yolov8-obb") == 0) {
901 0 : mode = YOLOV8_ORIENTED_BOUNDING_BOX;
902 : }
903 15 : bdata = getProperties (mode_name);
904 :
905 15 : if (bdata == nullptr) {
906 0 : nns_loge ("Could not find box properties name %s", param);
907 0 : return FALSE;
908 : }
909 :
910 15 : return TRUE;
911 : }
912 :
913 : /**
914 : * @brief Set label path of bounding box
915 : */
916 : int
917 13 : BoundingBox::setLabelPath (const char *param)
918 : {
919 13 : if (mode == MP_PALM_DETECTION_BOUNDING_BOX) {
920 : /* palm detection does not need label information */
921 0 : return TRUE;
922 : }
923 :
924 13 : if (NULL != label_path)
925 0 : g_free (label_path);
926 13 : label_path = g_strdup (param);
927 :
928 13 : if (NULL != label_path)
929 13 : loadImageLabels (label_path, &labeldata);
930 :
931 13 : if (labeldata.total_labels > 0) {
932 13 : bdata->setTotalLabels (labeldata.total_labels);
933 13 : return TRUE;
934 : } else
935 0 : return FALSE;
936 : /** @todo Do not die for this */
937 : }
938 :
939 : /**
940 : * @brief Set video size of bounding box
941 : */
942 : int
943 15 : BoundingBox::setVideoSize (const char *param)
944 : {
945 : tensor_dim dim;
946 : int rank;
947 :
948 15 : if (param == NULL || *param == '\0')
949 0 : return TRUE;
950 15 : rank = gst_tensor_parse_dimension (param, dim);
951 15 : width = 0;
952 15 : height = 0;
953 :
954 15 : if (rank < 2) {
955 0 : GST_ERROR ("mode-option-2 of boundingbox is video output dimension (WIDTH:HEIGHT). The given parameter, \"%s\", is not acceptable.",
956 : param);
957 0 : return TRUE; /* Ignore this param */
958 : }
959 15 : if (rank > 2) {
960 0 : GST_WARNING ("mode-option-2 of boundingbox is video output dimension (WIDTH:HEIGHT). The third and later elements of the given parameter, \"%s\", are ignored.",
961 : param);
962 : }
963 15 : width = dim[0];
964 15 : height = dim[1];
965 15 : return TRUE;
966 : }
967 :
968 : /**
969 : * @brief Set input model size of bounding box
970 : */
971 : int
972 15 : BoundingBox::setInputModelSize (const char *param)
973 : {
974 : tensor_dim dim;
975 : int rank;
976 15 : if (param == NULL || *param == '\0')
977 0 : return TRUE;
978 :
979 15 : rank = gst_tensor_parse_dimension (param, dim);
980 15 : bdata->setInputWidth (0);
981 15 : bdata->setInputHeight (0);
982 :
983 15 : if (rank < 2) {
984 0 : GST_ERROR ("mode-option-3 of boundingbox is input video dimension (WIDTH:HEIGHT). The given parameter, \"%s\", is not acceptable.",
985 : param);
986 0 : return TRUE; /* Ignore this param */
987 : }
988 15 : if (rank > 2) {
989 0 : GST_WARNING ("mode-option-3 of boundingbox is input video dimension (WIDTH:HEIGHT). The third and later elements of the given parameter, \"%s\", are ignored.",
990 : param);
991 : }
992 15 : bdata->setInputWidth (dim[0]);
993 15 : bdata->setInputHeight (dim[1]);
994 15 : return TRUE;
995 : }
996 :
997 : /**
998 : * @brief Set option of bounding box
999 : */
1000 : int
1001 77 : BoundingBox::setOption (BoundingBoxOption option, const char *param)
1002 : {
1003 77 : if (option == BoundingBoxOption::MODE) {
1004 15 : return setBoxDecodingMode (param);
1005 62 : } else if (option == BoundingBoxOption::LABEL_PATH) {
1006 13 : return setLabelPath (param);
1007 49 : } else if (option == BoundingBoxOption::INTERNAL) {
1008 : /* option3 = per-decoding-mode option */
1009 11 : return bdata->setOptionInternal (param);
1010 38 : } else if (option == BoundingBoxOption::VIDEO_SIZE) {
1011 15 : return setVideoSize (param);
1012 23 : } else if (option == BoundingBoxOption::INPUT_MODEL_SIZE) {
1013 15 : return setInputModelSize (param);
1014 8 : } else if (option == BoundingBoxOption::TRACK) {
1015 4 : is_track = (int) g_ascii_strtoll (param, NULL, 10);
1016 4 : return TRUE;
1017 4 : } else if (option == BoundingBoxOption::LOG) {
1018 4 : do_log = (int) g_ascii_strtoll (param, NULL, 10);
1019 4 : return TRUE;
1020 : }
1021 :
1022 : /**
1023 : * @todo Accept color / border-width / ... with option-2
1024 : */
1025 0 : GST_INFO ("Property mode-option-%d is ignored", static_cast<int> (option) + 1);
1026 0 : return TRUE;
1027 : }
1028 :
1029 : /**
1030 : * @brief Get out caps of bounding box
1031 : */
1032 : GstCaps *
1033 72 : BoundingBox::getOutCaps (const GstTensorsConfig *config)
1034 : {
1035 : GstCaps *caps;
1036 : char *str;
1037 :
1038 72 : int ret = bdata->checkCompatible (config);
1039 72 : if (!ret)
1040 16 : return NULL;
1041 :
1042 56 : str = g_strdup_printf ("video/x-raw, format = RGBA, " /* Use alpha channel to make the background transparent */
1043 : "width = %u, height = %u",
1044 : width, height);
1045 56 : caps = gst_caps_from_string (str);
1046 56 : setFramerateFromConfig (caps, config);
1047 56 : g_free (str);
1048 :
1049 56 : return caps;
1050 : }
1051 :
1052 : /**
1053 : * @brief Decode input memory to out buffer
1054 : * @param[in] config The structure of input tensor info.
1055 : * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
1056 : * @param[out] outbuf A sub-plugin should update or append proper memory for the negotiated media type.
1057 : */
1058 : GstFlowReturn
1059 26 : BoundingBox::decode (const GstTensorsConfig *config,
1060 : const GstTensorMemory *input, GstBuffer *outbuf)
1061 : {
1062 26 : const size_t size = (size_t) width * height * 4; /* RGBA */
1063 : GstMapInfo out_info;
1064 : GstMemory *out_mem;
1065 26 : GArray *results = NULL;
1066 : gboolean need_output_alloc;
1067 :
1068 26 : g_assert (outbuf);
1069 26 : need_output_alloc = gst_buffer_get_size (outbuf) == 0;
1070 :
1071 26 : if (checkLabelProps ())
1072 22 : flag_use_label = TRUE;
1073 : else
1074 4 : flag_use_label = FALSE;
1075 :
1076 : /* Ensure we have outbuf properly allocated */
1077 26 : if (need_output_alloc) {
1078 10 : out_mem = gst_allocator_alloc (NULL, size, NULL);
1079 : } else {
1080 16 : if (gst_buffer_get_size (outbuf) < size) {
1081 0 : gst_buffer_set_size (outbuf, size);
1082 : }
1083 16 : out_mem = gst_buffer_get_all_memory (outbuf);
1084 : }
1085 26 : if (!gst_memory_map (out_mem, &out_info, GST_MAP_WRITE)) {
1086 0 : ml_loge ("Cannot map output memory / tensordec-bounding_boxes.\n");
1087 0 : goto error_free;
1088 : }
1089 :
1090 : /* reset the buffer with alpha 0 / black */
1091 26 : memset (out_info.data, 0, size);
1092 :
1093 26 : results = bdata->decode (config, input);
1094 26 : if (results == NULL) {
1095 0 : GST_ERROR ("Failed to get output buffer, unknown mode %d.", mode);
1096 0 : goto error_unmap;
1097 : }
1098 :
1099 26 : if (do_log != 0) {
1100 6 : logBoxes (results);
1101 : }
1102 :
1103 26 : if (is_track != 0) {
1104 3 : updateCentroids (results);
1105 : }
1106 :
1107 26 : draw (&out_info, results);
1108 26 : g_array_free (results, TRUE);
1109 :
1110 26 : gst_memory_unmap (out_mem, &out_info);
1111 :
1112 26 : if (need_output_alloc)
1113 10 : gst_buffer_append_memory (outbuf, out_mem);
1114 : else
1115 16 : gst_buffer_replace_all_memory (outbuf, out_mem);
1116 :
1117 26 : return GST_FLOW_OK;
1118 :
1119 0 : error_unmap:
1120 0 : gst_memory_unmap (out_mem, &out_info);
1121 0 : error_free:
1122 0 : gst_memory_unref (out_mem);
1123 :
1124 0 : return GST_FLOW_ERROR;
1125 : }
1126 :
1127 : /**
1128 : * @brief Get bounding box properties from hash table
1129 : */
1130 : BoxProperties *
1131 263 : BoundingBox::getProperties (const gchar *properties_name)
1132 : {
1133 : gpointer data;
1134 263 : G_LOCK (box_properties_table);
1135 263 : if (properties_table == nullptr) {
1136 31 : properties_table = g_hash_table_new (g_str_hash, g_str_equal);
1137 : }
1138 263 : data = g_hash_table_lookup (properties_table, properties_name);
1139 263 : G_UNLOCK (box_properties_table);
1140 :
1141 263 : return static_cast<BoxProperties *> (data);
1142 : }
1143 :
1144 : /**
1145 : * @brief Add bounding box properties into hash table
1146 : */
1147 : gboolean
1148 248 : BoundingBox::addProperties (BoxProperties *boxProperties)
1149 : {
1150 : BoxProperties *data;
1151 : gboolean ret;
1152 :
1153 248 : data = getProperties (boxProperties->name);
1154 248 : if (NULL != data) {
1155 0 : return TRUE;
1156 : }
1157 :
1158 248 : G_LOCK (box_properties_table);
1159 248 : ret = g_hash_table_insert (properties_table, boxProperties->name, boxProperties);
1160 248 : G_UNLOCK (box_properties_table);
1161 :
1162 248 : return ret;
1163 : }
|