Line data Source code
1 : /* SPDX-License-Identifier: LGPL-2.1-only */
2 : /**
3 : * GStreamer / NNStreamer tensor-decoder bounding box properties
4 : * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
5 : */
6 : /**
7 : * @file yolo.cc
8 : * @date 13 May 2024
9 : * @brief NNStreamer tensor-decoder bounding box properties
10 : *
11 : * @see https://github.com/nnstreamer/nnstreamer
12 : * @author Yelin Jeong <yelini.jeong@samsung.com>
13 : * @bug No known bugs except for NYI items
14 : *
15 : */
16 :
17 : #include <nnstreamer_plugin_api_util.h>
18 : #include "../tensordec-boundingbox.h"
19 :
20 : #define YOLO_DETECTION_CONF_THRESHOLD (0.25)
21 : #define YOLO_DETECTION_IOU_THRESHOLD (0.45)
22 : #define DEFAULT_DETECTION_NUM_INFO_YOLO5 (5)
23 : #define DEFAULT_DETECTION_NUM_INFO_YOLO8 (4)
24 : #define DEFAULT_DETECTION_NUM_INFO_YOLO8_OBB (5)
25 :
26 : /**
27 : * @brief Class for YoloV5 box properties
28 : */
29 : class YoloV5 : public BoxProperties
30 : {
31 : public:
32 : YoloV5 ();
33 : ~YoloV5 ();
34 : int setOptionInternal (const char *param);
35 : int checkCompatible (const GstTensorsConfig *config);
36 : GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
37 :
38 : private:
39 : /* From option3, whether the output values are scaled or not */
40 : int scaled_output;
41 : gfloat conf_threshold;
42 : gfloat iou_threshold;
43 : };
44 :
45 : /**
46 : * @brief Class for YoloV8 box properties
47 : */
48 : class YoloV8 : public BoxProperties
49 : {
50 : public:
51 : YoloV8 ();
52 : ~YoloV8 ();
53 : int setOptionInternal (const char *param);
54 : int checkCompatible (const GstTensorsConfig *config);
55 : GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
56 :
57 : private:
58 : /* From option3, whether the output values are scaled or not */
59 : int scaled_output;
60 : gfloat conf_threshold;
61 : gfloat iou_threshold;
62 : };
63 :
64 : /**
65 : * @brief Class for YoloV10 box properties
66 : */
67 : class YoloV10 : public BoxProperties
68 : {
69 : public:
70 : YoloV10 ();
71 : ~YoloV10 ();
72 : int setOptionInternal (const char *param);
73 : int checkCompatible (const GstTensorsConfig *config);
74 : GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
75 :
76 : private:
77 : gfloat conf_threshold;
78 : };
79 :
80 : /**
81 : * @brief Class for YoloV8 box properties
82 : */
83 : class YoloV8_OBB : public BoxProperties
84 : {
85 : public:
86 : YoloV8_OBB ();
87 : ~YoloV8_OBB ();
88 : int setOptionInternal (const char *param);
89 : int checkCompatible (const GstTensorsConfig *config);
90 : GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
91 :
92 : private:
93 : /* From option3, whether the output values are scaled or not */
94 : int scaled_output;
95 : gfloat conf_threshold;
96 : gfloat iou_threshold;
97 : };
98 :
99 : static BoxProperties *yolo5 = nullptr;
100 : static BoxProperties *yolo8 = nullptr;
101 : static BoxProperties *yolo10 = nullptr;
102 : static BoxProperties *yolo8_obb = nullptr;
103 :
104 : #ifdef __cplusplus
105 : extern "C" {
106 : #endif /* __cplusplus */
107 : void init_properties_yolo5 (void) __attribute__ ((constructor));
108 : void fini_properties_yolo5 (void) __attribute__ ((destructor));
109 :
110 : void init_properties_yolo8 (void) __attribute__ ((constructor));
111 : void fini_properties_yolo8 (void) __attribute__ ((destructor));
112 :
113 : void init_properties_yolo10 (void) __attribute__ ((constructor));
114 : void fini_properties_yolo10 (void) __attribute__ ((destructor));
115 :
116 : void init_properties_yolo8_obb (void) __attribute__ ((constructor));
117 : void fini_properties_yolo8_obb (void) __attribute__ ((destructor));
118 : #ifdef __cplusplus
119 : }
120 : #endif /* __cplusplus */
121 :
122 : /** @brief Constructor of YoloV5 */
123 15 : YoloV5::YoloV5 ()
124 : {
125 15 : scaled_output = 0;
126 15 : conf_threshold = YOLO_DETECTION_CONF_THRESHOLD;
127 15 : iou_threshold = YOLO_DETECTION_IOU_THRESHOLD;
128 15 : name = g_strdup_printf ("yolov5");
129 15 : }
130 :
131 : /** @brief Destructor of YoloV5 */
132 30 : YoloV5::~YoloV5 ()
133 : {
134 15 : g_free (name);
135 30 : }
136 :
137 : /** @brief Set internal option of YoloV5
138 : * @param[in] param The option string.
139 : */
140 : int
141 2 : YoloV5::setOptionInternal (const char *param)
142 : {
143 : gchar **options;
144 : int noptions;
145 :
146 2 : options = g_strsplit (param, ":", -1);
147 2 : noptions = g_strv_length (options);
148 2 : if (noptions > 0)
149 2 : scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
150 2 : if (noptions > 1)
151 2 : conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
152 2 : if (noptions > 2)
153 2 : iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
154 :
155 2 : nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
156 : scaled_output, conf_threshold, iou_threshold);
157 :
158 2 : g_strfreev (options);
159 2 : return TRUE;
160 : }
161 :
162 : /** @brief Check compatibility of given tensors config
163 : * @param[in] param The option string.
164 : */
165 : int
166 18 : YoloV5::checkCompatible (const GstTensorsConfig *config)
167 : {
168 18 : GstTensorInfo *info = nullptr;
169 : const guint *dim;
170 : int i;
171 :
172 18 : info = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, 0);
173 18 : dim = info->dimension;
174 18 : if (!check_tensors (config, 1U))
175 0 : return FALSE;
176 :
177 18 : max_detection = ((i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
178 18 : + (i_width / 8) * (i_height / 8))
179 18 : * 3;
180 :
181 18 : g_return_val_if_fail (dim[0] == (total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO5), FALSE);
182 8 : g_return_val_if_fail (dim[1] == max_detection, FALSE);
183 120 : for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
184 112 : g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
185 8 : return TRUE;
186 : }
187 :
188 : /**
189 : * @brief Decode input memory to out buffer
190 : * @param[in] config The structure of input tensor info.
191 : * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
192 : */
193 : GArray *
194 4 : YoloV5::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
195 : {
196 4 : GArray *results = NULL;
197 :
198 : int bIdx, numTotalBox;
199 : int cIdx, numTotalClass, cStartIdx, cIdxMax;
200 : float *boxinput;
201 4 : int is_output_scaled = scaled_output;
202 4 : GstTensorInfo *info = nullptr;
203 :
204 4 : numTotalBox = max_detection;
205 4 : numTotalClass = total_labels;
206 4 : cStartIdx = DEFAULT_DETECTION_NUM_INFO_YOLO5;
207 4 : cIdxMax = numTotalClass + cStartIdx;
208 :
209 : /* boxinput[numTotalBox][cIdxMax] */
210 4 : boxinput = (float *) input[0].data;
211 :
212 : /** Only support for float type model */
213 :
214 4 : info = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, 0);
215 4 : g_assert (info->type == _NNS_FLOAT32);
216 :
217 4 : results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
218 25204 : for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
219 25200 : float maxClassConfVal = -INFINITY;
220 25200 : int maxClassIdx = -1;
221 2041200 : for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
222 2016000 : if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
223 37920 : maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
224 37920 : maxClassIdx = cIdx;
225 : }
226 : }
227 :
228 25200 : if (maxClassConfVal * boxinput[bIdx * cIdxMax + 4] > conf_threshold) {
229 : detectedObject object;
230 : float cx, cy, w, h;
231 224 : cx = boxinput[bIdx * cIdxMax + 0];
232 224 : cy = boxinput[bIdx * cIdxMax + 1];
233 224 : w = boxinput[bIdx * cIdxMax + 2];
234 224 : h = boxinput[bIdx * cIdxMax + 3];
235 :
236 224 : if (!is_output_scaled) {
237 224 : cx *= (float) i_width;
238 224 : cy *= (float) i_height;
239 224 : w *= (float) i_width;
240 224 : h *= (float) i_height;
241 : }
242 :
243 224 : object.x = (int) (MAX (0.f, (cx - w / 2.f)));
244 224 : object.y = (int) (MAX (0.f, (cy - h / 2.f)));
245 224 : object.width = (int) (MIN ((float) i_width, w));
246 224 : object.height = (int) (MIN ((float) i_height, h));
247 :
248 224 : object.prob = maxClassConfVal * boxinput[bIdx * cIdxMax + 4];
249 224 : object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO_YOLO5;
250 224 : object.tracking_id = 0;
251 224 : object.valid = TRUE;
252 224 : g_array_append_val (results, object);
253 : }
254 : }
255 :
256 4 : nms (results, iou_threshold, YOLOV5_BOUNDING_BOX);
257 4 : return results;
258 : }
259 :
260 : /** @brief Constructor of YoloV8 */
261 15 : YoloV8::YoloV8 ()
262 : {
263 15 : scaled_output = 0;
264 15 : conf_threshold = YOLO_DETECTION_CONF_THRESHOLD;
265 15 : iou_threshold = YOLO_DETECTION_IOU_THRESHOLD;
266 15 : name = g_strdup_printf ("yolov8");
267 15 : }
268 :
269 : /** @brief Destructor of YoloV8 */
270 30 : YoloV8::~YoloV8 ()
271 : {
272 15 : g_free (name);
273 30 : }
274 :
275 : /** @brief Set internal option of YoloV8 */
276 : int
277 1 : YoloV8::setOptionInternal (const char *param)
278 : {
279 : gchar **options;
280 : int noptions;
281 :
282 1 : options = g_strsplit (param, ":", -1);
283 1 : noptions = g_strv_length (options);
284 1 : if (noptions > 0)
285 1 : scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
286 1 : if (noptions > 1)
287 1 : conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
288 1 : if (noptions > 2)
289 1 : iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
290 :
291 1 : nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
292 : scaled_output, conf_threshold, iou_threshold);
293 :
294 1 : g_strfreev (options);
295 1 : return TRUE;
296 : }
297 :
298 : /** @brief Check compatibility of given tensors config */
299 : int
300 9 : YoloV8::checkCompatible (const GstTensorsConfig *config)
301 : {
302 9 : const guint *dim = config->info.info[0].dimension;
303 9 : g_autofree gchar *info_str = NULL;
304 : int i;
305 9 : if (!check_tensors (config, 1U)) {
306 0 : info_str = gst_tensors_info_to_string (&config->info);
307 0 : nns_loge ("Yolov8 bounding-box decoder needs at least 1 valid tensor. The given input tensor is: %s.",
308 : info_str);
309 0 : return FALSE;
310 : }
311 : /** Only support for float type model */
312 9 : if (config->info.info[0].type != _NNS_FLOAT32) {
313 5 : info_str = gst_tensors_info_to_string (&config->info);
314 5 : nns_loge ("Yolov8 bounding-box decoder accepts float32 input tensors only. The given input tensor is: %s.",
315 : info_str);
316 5 : return FALSE;
317 : }
318 :
319 4 : max_detection = (i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
320 4 : + (i_width / 8) * (i_height / 8);
321 :
322 4 : if (dim[0] != (total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO8) || dim[1] != max_detection) {
323 0 : nns_loge ("yolov8 boundingbox decoder requires the input shape to be %d:%d:1. But given shape is %d:%d:1. `tensor_transform mode=transpose` would be helpful.",
324 : total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO8, max_detection, dim[0], dim[1]);
325 0 : return FALSE;
326 : }
327 :
328 60 : for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
329 56 : if (dim[i] != 0 && dim[i] != 1) {
330 0 : info_str = gst_tensors_info_to_string (&config->info);
331 0 : nns_loge ("Yolov8 bounding-box decoder accepts RANK=2 tensors (3rd and later dimensions should be 1 or 0). The given input tensor is: %s.",
332 : info_str);
333 0 : return FALSE;
334 : }
335 4 : return TRUE;
336 9 : }
337 :
338 : /**
339 : * @brief Decode input memory to out buffer
340 : * @param[in] config The structure of input tensor info.
341 : * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
342 : */
343 : GArray *
344 1 : YoloV8::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
345 : {
346 1 : GArray *results = NULL;
347 : int bIdx, numTotalBox;
348 : int cIdx, numTotalClass, cStartIdx, cIdxMax;
349 : float *boxinput;
350 1 : int is_output_scaled = scaled_output;
351 : UNUSED (config);
352 :
353 1 : numTotalBox = max_detection;
354 1 : numTotalClass = total_labels;
355 1 : cStartIdx = DEFAULT_DETECTION_NUM_INFO_YOLO8;
356 1 : cIdxMax = numTotalClass + cStartIdx;
357 :
358 : /* boxinput[numTotalBox][cIdxMax] */
359 1 : boxinput = (float *) input[0].data;
360 :
361 1 : results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
362 2101 : for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
363 2100 : float maxClassConfVal = -INFINITY;
364 2100 : int maxClassIdx = -1;
365 170100 : for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
366 168000 : if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
367 4569 : maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
368 4569 : maxClassIdx = cIdx;
369 : }
370 : }
371 :
372 2100 : if (maxClassConfVal > conf_threshold) {
373 : detectedObject object;
374 : float cx, cy, w, h;
375 29 : cx = boxinput[bIdx * cIdxMax + 0];
376 29 : cy = boxinput[bIdx * cIdxMax + 1];
377 29 : w = boxinput[bIdx * cIdxMax + 2];
378 29 : h = boxinput[bIdx * cIdxMax + 3];
379 :
380 29 : if (!is_output_scaled) {
381 29 : cx *= (float) i_width;
382 29 : cy *= (float) i_height;
383 29 : w *= (float) i_width;
384 29 : h *= (float) i_height;
385 : }
386 :
387 29 : object.x = (int) (MAX (0.f, (cx - w / 2.f)));
388 29 : object.y = (int) (MAX (0.f, (cy - h / 2.f)));
389 29 : object.width = (int) (MIN ((float) i_width, w));
390 29 : object.height = (int) (MIN ((float) i_height, h));
391 :
392 29 : object.prob = maxClassConfVal;
393 29 : object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO_YOLO8;
394 29 : object.tracking_id = 0;
395 29 : object.valid = TRUE;
396 29 : g_array_append_val (results, object);
397 : }
398 : }
399 :
400 1 : nms (results, iou_threshold, YOLOV8_BOUNDING_BOX);
401 1 : return results;
402 : }
403 :
404 : /** @brief Constructor of YoloV10 */
405 15 : YoloV10::YoloV10 ()
406 : {
407 15 : conf_threshold = YOLO_DETECTION_CONF_THRESHOLD;
408 15 : name = g_strdup_printf ("yolov10");
409 15 : }
410 :
411 : /** @brief Destructor of YoloV10 */
412 30 : YoloV10::~YoloV10 ()
413 : {
414 15 : g_free (name);
415 30 : }
416 :
417 : /** @brief Set internal option of YoloV10 */
418 : int
419 2 : YoloV10::setOptionInternal (const char *param)
420 : {
421 : gchar **options;
422 : int noptions;
423 :
424 2 : options = g_strsplit (param, ":", -1);
425 2 : noptions = g_strv_length (options);
426 :
427 2 : if (noptions > 1)
428 2 : conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
429 :
430 2 : nns_logi ("Setting YOLOV10 decoder as conf_threshold: %.2f", conf_threshold);
431 :
432 2 : g_strfreev (options);
433 2 : return TRUE;
434 : }
435 :
436 : /** @brief Check compatibility of given tensors config */
437 : int
438 27 : YoloV10::checkCompatible (const GstTensorsConfig *config)
439 : {
440 27 : const guint *dim = config->info.info[0].dimension;
441 27 : g_autofree gchar *info_str = NULL;
442 : int i;
443 :
444 27 : if (!check_tensors (config, 1U)) {
445 0 : info_str = gst_tensors_info_to_string (&config->info);
446 0 : nns_loge ("YoloV10 bounding-box decoder needs at least 1 valid tensor. The given input tensor is: %s.",
447 : info_str);
448 0 : return FALSE;
449 : }
450 :
451 : /** Only support for float type model */
452 27 : if (config->info.info[0].type != _NNS_FLOAT32) {
453 23 : info_str = gst_tensors_info_to_string (&config->info);
454 23 : nns_loge ("YoloV10 bounding-box decoder accepts float32 input tensors only. The given input tensor is: %s.",
455 : info_str);
456 23 : return FALSE;
457 : }
458 :
459 : /* Expected shape is 6:#MAX_DET:1 */
460 4 : if (dim[0] != 6U) {
461 0 : nns_loge ("YoloV10 boundingbox decoder requires the input shape to be 6:#MAX_DET:1. But given shape is %u:%u:1. Check the output shape of yolov10 model.",
462 : dim[0], dim[1]);
463 0 : return FALSE;
464 : }
465 :
466 4 : max_detection = dim[1];
467 :
468 60 : for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i) {
469 56 : if (dim[i] != 0 && dim[i] != 1) {
470 0 : info_str = gst_tensors_info_to_string (&config->info);
471 0 : nns_loge ("YoloV10 bounding-box decoder accepts RANK=2 tensors (3rd and later dimensions should be 1 or 0). The given input tensor is: %s.",
472 : info_str);
473 :
474 0 : return FALSE;
475 : }
476 : }
477 :
478 4 : return TRUE;
479 27 : }
480 :
481 : /**
482 : * @brief Decode input memory to out buffer
483 : * @param[in] config The structure of input tensor info.
484 : * @param[in] input The array of input tensor data.
485 : */
486 : GArray *
487 1 : YoloV10::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
488 : {
489 1 : GArray *results = NULL;
490 : guint bIdx;
491 : float *boxinput;
492 : UNUSED (config);
493 :
494 : /* boxinput[MAX_DET][6] */
495 1 : boxinput = (float *) input[0].data;
496 :
497 1 : results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), max_detection);
498 6 : for (bIdx = 0; bIdx < max_detection; ++bIdx) {
499 : detectedObject object;
500 : float x1, x2, y1, y2, confidence, class_index;
501 :
502 : /* parse output of yolov10 */
503 6 : x1 = boxinput[bIdx * 6 + 0];
504 6 : y1 = boxinput[bIdx * 6 + 1];
505 6 : x2 = boxinput[bIdx * 6 + 2];
506 6 : y2 = boxinput[bIdx * 6 + 3];
507 6 : confidence = boxinput[bIdx * 6 + 4];
508 6 : class_index = boxinput[bIdx * 6 + 5];
509 :
510 : /* output of yolov10 is sorted */
511 6 : if (confidence < conf_threshold) {
512 : /* break once confidence value falls */
513 1 : break;
514 : }
515 :
516 : /* scale to given width and height */
517 5 : y1 *= (float) i_height;
518 5 : x1 *= (float) i_width;
519 5 : x2 *= (float) i_width;
520 5 : y2 *= (float) i_height;
521 :
522 5 : object.x = (int) (MAX (0.f, x1));
523 5 : object.y = (int) (MAX (0.f, y1));
524 5 : object.width = (int) (MIN ((float) i_width, x2 - x1));
525 5 : object.height = (int) (MIN ((float) i_height, y2 - y1));
526 5 : object.class_id = (int) class_index;
527 5 : object.prob = confidence;
528 :
529 5 : object.tracking_id = 0;
530 5 : object.valid = TRUE;
531 :
532 5 : if (object.class_id >= (int) total_labels) {
533 0 : nns_logw ("Class id %d is out of range (%u). Skip this object.",
534 : object.class_id, total_labels);
535 0 : continue;
536 : }
537 :
538 5 : g_array_append_val (results, object);
539 : }
540 :
541 1 : return results;
542 : }
543 :
544 : /** @brief Constructor of YoloV8-OBB */
545 15 : YoloV8_OBB::YoloV8_OBB ()
546 : {
547 15 : scaled_output = 0;
548 15 : conf_threshold = YOLO_DETECTION_CONF_THRESHOLD;
549 15 : iou_threshold = YOLO_DETECTION_IOU_THRESHOLD;
550 15 : name = g_strdup_printf ("yolov8-obb");
551 15 : }
552 :
553 : /** @brief Destructor of YoloV8-OBB */
554 30 : YoloV8_OBB::~YoloV8_OBB ()
555 : {
556 15 : g_free (name);
557 30 : }
558 :
559 : /** @brief Set internal option of YoloV8-OBB */
560 : int
561 0 : YoloV8_OBB::setOptionInternal (const char *param)
562 : {
563 : gchar **options;
564 : int noptions;
565 :
566 0 : options = g_strsplit (param, ":", -1);
567 0 : noptions = g_strv_length (options);
568 0 : if (noptions > 0)
569 0 : scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
570 0 : if (noptions > 1)
571 0 : conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
572 0 : if (noptions > 2)
573 0 : iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
574 :
575 0 : nns_logi ("Setting YOLOV8-OBB decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
576 : scaled_output, conf_threshold, iou_threshold);
577 :
578 0 : g_strfreev (options);
579 0 : return TRUE;
580 : }
581 :
582 : /** @brief Check compatibility of given tensors config */
583 : int
584 0 : YoloV8_OBB::checkCompatible (const GstTensorsConfig *config)
585 : {
586 0 : const guint *dim = config->info.info[0].dimension;
587 0 : g_autofree gchar *info_str = NULL;
588 : int i;
589 :
590 0 : if (!check_tensors (config, 1U)) {
591 0 : info_str = gst_tensors_info_to_string (&config->info);
592 0 : nns_loge ("YoloV8-OBB bounding-box decoder needs at least 1 valid tensor. The given input tensor is: %s.",
593 : info_str);
594 0 : return FALSE;
595 : }
596 :
597 : /** Only support for float type model */
598 0 : if (config->info.info[0].type != _NNS_FLOAT32) {
599 0 : info_str = gst_tensors_info_to_string (&config->info);
600 0 : nns_loge ("YoloV8-OBB bounding-box decoder accepts float32 input tensors only. The given input tensor is: %s.",
601 : info_str);
602 0 : return FALSE;
603 : }
604 :
605 0 : max_detection = (i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
606 0 : + (i_width / 8) * (i_height / 8);
607 :
608 0 : if (dim[0] != (total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO8_OBB) || dim[1] != max_detection) {
609 0 : nns_loge ("yolov8-obb boundingbox decoder requires the input shape to be %d:%d:1. But given shape is %d:%d:1. `tensor_transform mode=transpose` would be helpful.",
610 : total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO8_OBB, max_detection,
611 : dim[0], dim[1]);
612 0 : return FALSE;
613 : }
614 :
615 0 : for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i) {
616 0 : if (dim[i] != 0 && dim[i] != 1) {
617 0 : info_str = gst_tensors_info_to_string (&config->info);
618 0 : nns_loge ("YoloV8-OBB bounding-box decoder accepts RANK=2 tensors (3rd and later dimensions should be 1 or 0). The given input tensor is: %s.",
619 : info_str);
620 :
621 0 : return FALSE;
622 : }
623 : }
624 :
625 0 : return TRUE;
626 0 : }
627 :
628 : /**
629 : * @brief Decode input memory to out buffer
630 : * @param[in] config The structure of input tensor info.
631 : * @param[in] input The array of input tensor data.
632 : */
633 : GArray *
634 0 : YoloV8_OBB::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
635 : {
636 0 : GArray *results = NULL;
637 : int bIdx, numTotalBox;
638 : int cIdx, numTotalClass, cStartIdx, cIdxMax;
639 : float *boxinput; /* boxinput = [x,y,w,h,...class...,theta] */
640 0 : int is_output_scaled = scaled_output;
641 : UNUSED (config);
642 :
643 0 : numTotalBox = max_detection;
644 0 : numTotalClass = total_labels;
645 0 : cStartIdx = DEFAULT_DETECTION_NUM_INFO_YOLO8_OBB - 1;
646 0 : cIdxMax = numTotalClass + DEFAULT_DETECTION_NUM_INFO_YOLO8_OBB;
647 :
648 0 : boxinput = (float *) input[0].data;
649 :
650 0 : results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
651 0 : for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
652 0 : float maxClassConfVal = -INFINITY;
653 0 : int maxClassIdx = -1;
654 :
655 0 : for (cIdx = cStartIdx; cIdx < cIdxMax - 1; ++cIdx) {
656 0 : if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
657 0 : maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
658 0 : maxClassIdx = cIdx;
659 : }
660 : }
661 :
662 0 : if (maxClassConfVal > conf_threshold) {
663 : detectedObject object;
664 : float cx, cy, w, h, theta;
665 :
666 0 : cx = boxinput[bIdx * cIdxMax + 0];
667 0 : cy = boxinput[bIdx * cIdxMax + 1];
668 0 : w = boxinput[bIdx * cIdxMax + 2];
669 0 : h = boxinput[bIdx * cIdxMax + 3];
670 0 : theta = boxinput[bIdx * cIdxMax + numTotalClass + DEFAULT_DETECTION_NUM_INFO_YOLO8_OBB - 1];
671 :
672 0 : if (!is_output_scaled) {
673 0 : cx *= (float) i_width;
674 0 : cy *= (float) i_height;
675 0 : w *= (float) i_width;
676 0 : h *= (float) i_height;
677 : }
678 :
679 0 : object.x = (int) (MAX (0.f, cx));
680 0 : object.y = (int) (MAX (0.f, cy));
681 0 : object.width = (int) w;
682 0 : object.height = (int) h;
683 0 : object.angle = theta;
684 :
685 0 : object.prob = maxClassConfVal;
686 0 : object.class_id = maxClassIdx - cStartIdx;
687 0 : object.tracking_id = 0;
688 0 : object.valid = TRUE;
689 :
690 0 : g_array_append_val (results, object);
691 : }
692 : }
693 :
694 0 : nms (results, iou_threshold, YOLOV8_ORIENTED_BOUNDING_BOX);
695 0 : return results;
696 : }
697 :
698 : /** @brief Initialize this object for tensor decoder bounding box */
699 : void
700 15 : init_properties_yolo5 ()
701 : {
702 15 : yolo5 = new YoloV5 ();
703 15 : BoundingBox::addProperties (yolo5);
704 15 : }
705 :
706 : /** @brief Destruct this object for tensor decoder bounding box */
707 : void
708 15 : fini_properties_yolo5 ()
709 : {
710 15 : delete yolo5;
711 15 : }
712 :
713 : /** @brief Initialize this object for tensor decoder bounding box */
714 : void
715 15 : init_properties_yolo8 ()
716 : {
717 15 : yolo8 = new YoloV8 ();
718 15 : BoundingBox::addProperties (yolo8);
719 15 : }
720 :
721 : /** @brief Destruct this object for tensor decoder bounding box */
722 : void
723 15 : fini_properties_yolo8 ()
724 : {
725 15 : delete yolo8;
726 15 : }
727 :
728 : /** @brief Initialize this object for tensor decoder bounding box */
729 : void
730 15 : init_properties_yolo10 ()
731 : {
732 15 : yolo10 = new YoloV10 ();
733 15 : BoundingBox::addProperties (yolo10);
734 15 : }
735 :
736 : /** @brief Destruct this object for tensor decoder bounding box */
737 : void
738 15 : fini_properties_yolo10 ()
739 : {
740 15 : delete yolo10;
741 15 : }
742 :
743 : /** @brief Initialize this object for tensor decoder bounding box */
744 : void
745 15 : init_properties_yolo8_obb ()
746 : {
747 15 : yolo8_obb = new YoloV8_OBB ();
748 15 : BoundingBox::addProperties (yolo8_obb);
749 15 : }
750 :
751 : /** @brief Destruct this object for tensor decoder bounding box */
752 : void
753 15 : fini_properties_yolo8_obb ()
754 : {
755 15 : delete yolo8_obb;
756 15 : }
|