Line data Source code
1 : /* SPDX-License-Identifier: LGPL-2.1-only */
2 : /**
3 : * GStreamer / NNStreamer tensor-decoder bounding box properties
4 : * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
5 : */
6 : /**
7 : * @file mppalmdetection.cc
8 : * @date 13 May 2024
9 : * @brief NNStreamer tensor-decoder bounding box properties
10 : *
11 : * @see https://github.com/nnstreamer/nnstreamer
12 : * @author Yelin Jeong <yelini.jeong@samsung.com>
13 : * @bug No known bugs except for NYI items
14 : *
15 : */
16 :
17 : #include "../tensordec-boundingbox.h"
18 :
19 : #define INFO_SIZE (18)
20 : #define MAX_TENSORS (2U)
21 : #define MAX_DETECTION (2016)
22 :
23 : #define NUM_LAYERS_DEFAULT (4)
24 : #define MIN_SCALE_DEFAULT (1.0)
25 : #define MAX_SCALE_DEFAULT (1.0)
26 : #define OFFSET_X_DEFAULT (0.5)
27 : #define OFFSET_Y_DEFAULT (0.5)
28 : #define STRIDE_0_DEFAULT (8)
29 : #define STRIDE_1_DEFAULT (16)
30 : #define STRIDE_2_DEFAULT (16)
31 : #define STRIDE_3_DEFAULT (16)
32 : #define MIN_SCORE_THRESHOLD_DEFAULT (0.5)
33 :
34 : #define PARAMS_STRIDE_SIZE (8)
35 : #define PARAMS_MAX (13)
36 :
37 : /**
38 : * @brief Class for MpPalmDetection box properties
39 : */
40 : class MpPalmDetection : public BoxProperties
41 : {
42 : public:
43 : MpPalmDetection ();
44 : ~MpPalmDetection ();
45 : void mp_palm_detection_generate_anchors ();
46 : int setOptionInternal (const char *param);
47 : int checkCompatible (const GstTensorsConfig *config);
48 :
49 : GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
50 :
51 : private:
52 : gint num_layers;
53 : /** Number of stride layers */
54 : gfloat min_scale; /** Minimum scale */
55 : gfloat max_scale; /** Maximum scale */
56 : gfloat offset_x; /** anchor X offset */
57 : gfloat offset_y; /** anchor Y offset */
58 : gint strides[PARAMS_MAX]; /** Stride data for each layers */
59 : gfloat min_score_threshold; /** minimum threshold of score */
60 :
61 : GArray *anchors;
62 : };
63 :
64 : /**
65 : * @brief C++-Template-like box location calculation for Tensorflow model
66 : * @param[in] type The tensor type of inputptr
67 : * @param[in] typename nnstreamer enum corresponding to the type
68 : * @param[in] scoreinput Input Tensor Data (Detection scores)
69 : * @param[in] boxesinput Input Tensor Data (Boxes)
70 : * @param[in] config Tensor configs of the input tensors
71 : * @param[out] results The object returned. (GArray with detectedObject)
72 : */
73 : #define _get_objects_mp_palm_detection(_type, typename, scoreinput, boxesinput, config) \
74 : case typename: \
75 : { \
76 : int d_; \
77 : _type *scores_ = (_type *) scoreinput; \
78 : _type *boxes_ = (_type *) boxesinput; \
79 : int num_ = max_detection; \
80 : info = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, 0); \
81 : size_t boxbpi_ = info->dimension[0]; \
82 : results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num_); \
83 : for (d_ = 0; d_ < num_; d_++) { \
84 : gfloat y_center, x_center, h, w; \
85 : gfloat ymin, xmin; \
86 : int y, x, width, height; \
87 : detectedObject object; \
88 : gfloat score = (gfloat) scores_[d_]; \
89 : _type *box = boxes_ + boxbpi_ * d_; \
90 : anchor *a = &g_array_index (this->anchors, anchor, d_); \
91 : score = MAX (score, -100.0f); \
92 : score = MIN (score, 100.0f); \
93 : score = 1.0f / (1.0f + exp (-score)); \
94 : if (score < min_score_threshold) \
95 : continue; \
96 : y_center = (box[0] * 1.f) / i_height * a->h + a->y_center; \
97 : x_center = (box[1] * 1.f) / i_width * a->w + a->x_center; \
98 : h = (box[2] * 1.f) / i_height * a->h; \
99 : w = (box[3] * 1.f) / i_width * a->w; \
100 : ymin = y_center - h / 2.f; \
101 : xmin = x_center - w / 2.f; \
102 : y = ymin * i_height; \
103 : x = xmin * i_width; \
104 : width = w * i_width; \
105 : height = h * i_height; \
106 : object.class_id = 0; \
107 : object.x = MAX (0, x); \
108 : object.y = MAX (0, y); \
109 : object.width = width; \
110 : object.height = height; \
111 : object.prob = score; \
112 : object.valid = TRUE; \
113 : g_array_append_val (results, object); \
114 : } \
115 : } \
116 : break
117 :
118 : /** @brief Macro to simplify calling _get_objects_mp_palm_detection */
119 : #define _get_objects_mp_palm_detection_(type, typename) \
120 : _get_objects_mp_palm_detection (type, typename, (detections->data), (boxes->data), config)
121 :
122 : #define mp_palm_detection_option(option, type, idx) \
123 : if (noptions > idx) \
124 : option = (type) g_strtod (options[idx], NULL)
125 :
126 : /**
127 : * @brief Calculate anchor scale
128 : */
129 : static gfloat
130 16 : _calculate_scale (float min_scale, float max_scale, int stride_index, int num_strides)
131 : {
132 16 : if (num_strides == 1) {
133 0 : return (min_scale + max_scale) * 0.5f;
134 : } else {
135 16 : return min_scale + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
136 : }
137 : }
138 :
139 : static BoxProperties *mp_palm_detection = nullptr;
140 :
141 : #ifdef __cplusplus
142 : extern "C" {
143 : #endif /* __cplusplus */
144 : void init_properties_mp_palm_detection (void) __attribute__ ((constructor));
145 : void fini_properties_mp_palm_detection (void) __attribute__ ((destructor));
146 : #ifdef __cplusplus
147 : }
148 : #endif /* __cplusplus */
149 :
150 : /**
151 : * @brief Generate anchor information
152 : */
153 : void
154 2 : MpPalmDetection::mp_palm_detection_generate_anchors ()
155 : {
156 2 : int layer_id = 0;
157 : guint i;
158 :
159 6 : while (layer_id < num_layers) {
160 4 : GArray *aspect_ratios = g_array_new (FALSE, TRUE, sizeof (gfloat));
161 4 : GArray *scales = g_array_new (FALSE, TRUE, sizeof (gfloat));
162 4 : GArray *anchor_height = g_array_new (FALSE, TRUE, sizeof (gfloat));
163 4 : GArray *anchor_width = g_array_new (FALSE, TRUE, sizeof (gfloat));
164 :
165 4 : int last_same_stride_layer = layer_id;
166 :
167 4 : while (last_same_stride_layer < num_layers
168 12 : && strides[last_same_stride_layer] == strides[layer_id]) {
169 : gfloat scale;
170 8 : gfloat ratio = 1.0f;
171 8 : g_array_append_val (aspect_ratios, ratio);
172 8 : g_array_append_val (aspect_ratios, ratio);
173 8 : scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer, num_layers);
174 8 : g_array_append_val (scales, scale);
175 8 : scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer + 1, num_layers);
176 8 : g_array_append_val (scales, scale);
177 8 : last_same_stride_layer++;
178 : }
179 :
180 20 : for (i = 0; i < aspect_ratios->len; ++i) {
181 16 : const float ratio_sqrts = sqrt (g_array_index (aspect_ratios, gfloat, i));
182 16 : const gfloat sc = g_array_index (scales, gfloat, i);
183 16 : gfloat anchor_height_ = sc / ratio_sqrts;
184 16 : gfloat anchor_width_ = sc * ratio_sqrts;
185 16 : g_array_append_val (anchor_height, anchor_height_);
186 16 : g_array_append_val (anchor_width, anchor_width_);
187 : }
188 :
189 : {
190 4 : int feature_map_height = 0;
191 4 : int feature_map_width = 0;
192 : int x, y;
193 : int anchor_id;
194 :
195 4 : const int stride = strides[layer_id];
196 4 : feature_map_height = ceil (1.0f * 192 / stride);
197 4 : feature_map_width = ceil (1.0f * 192 / stride);
198 76 : for (y = 0; y < feature_map_height; ++y) {
199 1512 : for (x = 0; x < feature_map_width; ++x) {
200 5472 : for (anchor_id = 0; anchor_id < (int) aspect_ratios->len; ++anchor_id) {
201 4032 : const float x_center = (x + offset_x) * 1.0f / feature_map_width;
202 4032 : const float y_center = (y + offset_y) * 1.0f / feature_map_height;
203 :
204 : const anchor a = { .x_center = x_center,
205 : .y_center = y_center,
206 4032 : .w = g_array_index (anchor_width, gfloat, anchor_id),
207 4032 : .h = g_array_index (anchor_height, gfloat, anchor_id) };
208 4032 : g_array_append_val (anchors, a);
209 : }
210 : }
211 : }
212 4 : layer_id = last_same_stride_layer;
213 : }
214 :
215 4 : g_array_free (anchor_height, TRUE);
216 4 : g_array_free (anchor_width, TRUE);
217 4 : g_array_free (aspect_ratios, TRUE);
218 4 : g_array_free (scales, TRUE);
219 : }
220 2 : }
221 :
222 : /** @brief Constructor of MpPalmDetection */
223 15 : MpPalmDetection::MpPalmDetection ()
224 : {
225 15 : max_detection = 0;
226 15 : num_layers = NUM_LAYERS_DEFAULT;
227 15 : min_scale = MIN_SCALE_DEFAULT;
228 15 : max_scale = MAX_SCALE_DEFAULT;
229 15 : offset_x = OFFSET_X_DEFAULT;
230 15 : offset_y = OFFSET_Y_DEFAULT;
231 15 : strides[0] = STRIDE_0_DEFAULT;
232 15 : strides[1] = STRIDE_1_DEFAULT;
233 15 : strides[2] = STRIDE_2_DEFAULT;
234 15 : strides[3] = STRIDE_3_DEFAULT;
235 15 : min_score_threshold = MIN_SCORE_THRESHOLD_DEFAULT;
236 15 : anchors = g_array_new (FALSE, TRUE, sizeof (anchor));
237 15 : name = g_strdup_printf ("mp-palm-detection");
238 15 : }
239 :
240 : /** @brief Destructor of MpPalmDetection */
241 30 : MpPalmDetection::~MpPalmDetection ()
242 : {
243 15 : if (anchors)
244 15 : g_array_free (anchors, TRUE);
245 15 : anchors = NULL;
246 15 : g_free (name);
247 30 : }
248 :
249 : /** @brief Set internal option of MpPalmDetection
250 : * @param[in] param The option string.
251 : */
252 : int
253 2 : MpPalmDetection::setOptionInternal (const char *param)
254 : {
255 : /* Load palm detection info from option3 */
256 : gchar **options;
257 : int noptions, idx;
258 2 : int ret = TRUE;
259 :
260 2 : options = g_strsplit (param, ":", -1);
261 2 : noptions = g_strv_length (options);
262 :
263 2 : if (noptions > PARAMS_MAX) {
264 0 : GST_ERROR ("Invalid MP PALM DETECTION PARAM length: %d", noptions);
265 0 : ret = FALSE;
266 0 : goto exit_mp_palm_detection;
267 : }
268 :
269 2 : mp_palm_detection_option (min_score_threshold, gfloat, 0);
270 2 : mp_palm_detection_option (num_layers, gint, 1);
271 2 : mp_palm_detection_option (min_scale, gfloat, 2);
272 2 : mp_palm_detection_option (max_scale, gfloat, 3);
273 2 : mp_palm_detection_option (offset_x, gfloat, 4);
274 2 : mp_palm_detection_option (offset_y, gfloat, 5);
275 :
276 10 : for (idx = 6; idx < num_layers + 6; idx++) {
277 8 : mp_palm_detection_option (strides[idx - 6], gint, idx);
278 : }
279 2 : mp_palm_detection_generate_anchors ();
280 :
281 2 : exit_mp_palm_detection:
282 2 : g_strfreev (options);
283 2 : return ret;
284 : }
285 :
286 : /** @brief Check compatibility of given tensors config */
287 : int
288 18 : MpPalmDetection::checkCompatible (const GstTensorsConfig *config)
289 : {
290 : const uint32_t *dim1, *dim2;
291 : int i;
292 18 : GstTensorInfo *info = nullptr;
293 :
294 18 : if (!check_tensors (config, MAX_TENSORS))
295 10 : return FALSE;
296 :
297 : /* Check if the first tensor is compatible */
298 8 : info = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, 0);
299 8 : dim1 = info->dimension;
300 :
301 8 : g_return_val_if_fail (dim1[0] == INFO_SIZE, FALSE);
302 8 : g_return_val_if_fail (dim1[1] > 0, FALSE);
303 8 : g_return_val_if_fail (dim1[2] == 1, FALSE);
304 112 : for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
305 104 : g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
306 :
307 : /* Check if the second tensor is compatible */
308 8 : info = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, 1);
309 8 : dim2 = info->dimension;
310 8 : g_return_val_if_fail (dim2[0] == 1, FALSE);
311 8 : g_return_val_if_fail (dim1[1] == dim2[1], FALSE);
312 120 : for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
313 112 : g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
314 :
315 : /* Check consistency with max_detection */
316 8 : if (max_detection != 0 && max_detection != dim1[1]) {
317 0 : GST_ERROR ("Failed to check consistency with max_detection");
318 0 : return FALSE;
319 : } else {
320 8 : max_detection = dim1[1];
321 : }
322 :
323 8 : if (max_detection > MAX_DETECTION) {
324 0 : GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
325 0 : return FALSE;
326 : }
327 8 : return TRUE;
328 : }
329 :
330 : /**
331 : * @brief Decode input memory to out buffer
332 : * @param[in] config The structure of input tensor info.
333 : * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
334 : */
335 : GArray *
336 4 : MpPalmDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
337 : {
338 4 : GArray *results = NULL;
339 4 : const GstTensorMemory *boxes = NULL;
340 4 : const GstTensorMemory *detections = NULL;
341 4 : const guint num_tensors = config->info.num_tensors;
342 4 : GstTensorInfo *info = nullptr;
343 :
344 : /* Already checked with getOutCaps. Thus, this is an internal bug */
345 4 : g_assert (num_tensors >= MAX_TENSORS);
346 :
347 : /* results will be allocated by _get_objects_mp_palm_detection_ */
348 4 : boxes = &input[0];
349 4 : detections = &input[1];
350 4 : info = gst_tensors_info_get_nth_info ((GstTensorsInfo *) &config->info, 0);
351 :
352 4 : switch (info->type) {
353 0 : _get_objects_mp_palm_detection_ (uint8_t, _NNS_UINT8);
354 0 : _get_objects_mp_palm_detection_ (int8_t, _NNS_INT8);
355 0 : _get_objects_mp_palm_detection_ (uint16_t, _NNS_UINT16);
356 0 : _get_objects_mp_palm_detection_ (int16_t, _NNS_INT16);
357 0 : _get_objects_mp_palm_detection_ (uint32_t, _NNS_UINT32);
358 0 : _get_objects_mp_palm_detection_ (int32_t, _NNS_INT32);
359 0 : _get_objects_mp_palm_detection_ (uint64_t, _NNS_UINT64);
360 0 : _get_objects_mp_palm_detection_ (int64_t, _NNS_INT64);
361 8068 : _get_objects_mp_palm_detection_ (float, _NNS_FLOAT32);
362 0 : _get_objects_mp_palm_detection_ (double, _NNS_FLOAT64);
363 :
364 0 : default:
365 0 : g_assert (0);
366 : }
367 4 : nms (results, 0.05f, MP_PALM_DETECTION_BOUNDING_BOX);
368 4 : return results;
369 : }
370 :
371 : /** @brief Initialize this object for tensor decoder bounding box */
372 : void
373 15 : init_properties_mp_palm_detection ()
374 : {
375 15 : mp_palm_detection = new MpPalmDetection ();
376 15 : BoundingBox::addProperties (mp_palm_detection);
377 15 : }
378 :
379 : /** @brief Destruct this object for tensor decoder bounding box */
380 : void
381 15 : fini_properties_mp_palm_detection ()
382 : {
383 15 : delete mp_palm_detection;
384 15 : }
|