Line data Source code
1 : /**
2 : * GStreamer / NNStreamer tensor_decoder subplugin, "Pose estimation"
3 : * Copyright (C) 2019 Samsung Electronics Co. Ltd.
4 : * Copyright (C) 2019 Jijoong Moon <jijoong.moon@samsung.com>
5 : *
6 : * This library is free software; you can redistribute it and/or
7 : * modify it under the terms of the GNU Library General Public
8 : * License as published by the Free Software Foundation;
9 : * version 2.1 of the License.
10 : *
11 : * This library is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : * Library General Public License for more details.
15 : *
16 : */
17 : /**
18 : * @file tensordec-pose.c
19 : * @date 13 May 2019
20 : * @brief NNStreamer tensor-decoder subplugin, "pose estimation",
21 : * which converts tensors to video stream w/ pose on
22 : * transparent background.
23 : * This code is NYI/WIP and not compilable.
24 : *
25 : * @see https://github.com/nnstreamer/nnstreamer
26 : * @author Jijoong Moon <jijoong.moon@samsung.com>
27 : * @bug No known bugs except for NYI items
28 : *
29 : * option1: Video Output Dimension (WIDTH:HEIGHT)
30 : * option2: Input Dimension (WIDTH:HEIGHT)
31 : * option3: Location of label file (optional)
32 : * The file describes the keypoints and their body connections.
33 : * A line per keypoint description is expected with the following syntax:
34 : * <label name> <keypoint id> <keypoint id>
35 : *
36 : * For instance, the posenet model label description of model
37 : * https://www.tensorflow.org/lite/examples/pose_estimation/overview
38 : * would be the following:
39 : * nose 1 2 3 4
40 : * leftEye 0 2 3
41 : * rightEye 0 1 4
42 : * leftEar 0 1
43 : * rightEar 0 2
44 : * leftShoulder 6 7 11
45 : * rightShoulder 5 8 12
46 : * leftElbow 5 9
47 : * rightElbow 6 10
48 : * leftWrist 7
49 : * rightWrist 8
50 : * leftHip 5 12 13
51 : * rightHip 6 11 14
52 : * leftKnee 11 15
53 : * rightKnee 12 16
54 : * leftAnkle 13
55 : * rightAnkle 14
56 : *
57 : * option4: Mode (optional)
58 : * Available: heatmap-only (default)
59 : * heatmap-offset
60 : *
61 : * Expected input dims:
62 : * Note: Width, Height are related to heatmap resolution.
63 : * - heatmap-only:
64 : * Tensors mapping: Heatmap
65 : * Tensor[0]: #labels x width x height (float32, label probability)
66 : * (e.g., 14 x 33 x 33 )
67 : * - heatmap-offset:
68 : * Compatible with posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite
69 : * Tensors mapping: Heatmap, Offset
70 : * Tensor[0]: #labels : width : height (float32, label sigmoid probability)
71 : * (e.g., 17 x 9 x 9 )
72 : * Tensor[1]: #labels x 2: width : height (float32, Offset position within heatmap grid)
73 : * (e.g., 34 x 9 x 9 )
74 : *
75 : * Pipeline:
76 : * v4l2src
77 : * |
78 : * videoconvert
79 : * |
80 : * videoscale -- tee ------------------------------------------------- compositor -- videoconvert -- ximagesink
81 : * | |
82 : * videoscale |
83 : * | |
84 : * tensor_converter -- tensor_transform -- tensor_filter -- tensor_decoder
85 : *
86 : * - Used model is posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite
87 : * - Resize image into 257:257 at the second videoscale.
88 : * - Transform RGB value into float32 in range [0,1] at tensor_transform.
89 : *
90 : * gst-launch-1.0 v4l2src ! videoconvert ! videoscale ! \
91 : * video/x-raw,format=RGB,width=640,height=480,framerate=30/1 ! \tee name=t \
92 : * t. ! queue ! videoscale ! video/x-raw,width=257,height=257,format=RGB ! \
93 : * tensor_converter ! tensor_transform mode=arithmetic option=typecast:float32,add:-127.5,div:127.5 ! \
94 : * tensor_filter framework=tensorflow-lite model=posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite ! \
95 : * tensor_decoder mode=pose_estimation option1=640:480 option2=257:257 option3=pose_label.txt option4=heatmap-offset ! \
96 : * compositor name=mix sink_0::zorder=1 sink_1::zorder=0 ! videoconvert ! ximagesink \
97 : * t. ! queue ! mix.
98 : */
99 :
100 : #include <stdlib.h>
101 : #include <string.h>
102 : #include <stdint.h>
103 : #include <math.h>
104 : #include <glib.h>
105 : #include <gst/gst.h>
106 : #include <nnstreamer_plugin_api_decoder.h>
107 : #include <nnstreamer_plugin_api.h>
108 : #include <nnstreamer_log.h>
109 : #include <nnstreamer_util.h>
110 : #include "tensordecutil.h"
111 :
112 : void init_pose (void) __attribute__ ((constructor));
113 : void fini_pose (void) __attribute__ ((destructor));
114 :
115 : /* font.c */
116 : extern uint8_t rasters[][13];
117 :
118 : #define PIXEL_VALUE (0xFFFFFFFF)
119 :
120 : #define POSE_MD_MAX_LABEL_SZ 16
121 : #define POSE_MD_MAX_CONNECTIONS_SZ 8
122 :
123 : /**
124 : * @brief Macro for calculating sigmoid
125 : */
126 : #define _sigmoid(x) \
127 : (1.f / (1.f + expf (-x)))
128 :
129 : /**
130 : * @brief There can be different schemes for pose estimation decoding scheme.
131 : */
132 : typedef enum
133 : {
134 : HEATMAP_ONLY = 0,
135 : HEATMAP_OFFSET = 1,
136 : HEATMAP_UNKNOWN,
137 : } pose_modes;
138 :
139 : /**
140 : * @brief List of pose estimation decoding schemes in string
141 : */
142 : static const char *pose_string_modes[] = {
143 : [HEATMAP_ONLY] = "heatmap-only",
144 : [HEATMAP_OFFSET] = "heatmap-offset",
145 : NULL,
146 : };
147 :
148 : /**
149 : * @brief Data structure for key body point description.
150 : */
151 : static struct pose_metadata_s
152 : {
153 : gchar label[POSE_MD_MAX_LABEL_SZ]; /**< Key body name */
154 : gint connections[POSE_MD_MAX_CONNECTIONS_SZ];/**< Connections list */
155 : gint num_connections; /** Total number of connections */
156 : } pose_metadata_default[] = {
157 : {
158 : "top", {
159 : 1}, 1}, {
160 : "neck", {
161 : 0, 2, 5, 8, 11}, 5}, {
162 : "r_shoulder", {
163 : 1, 3}, 2}, {
164 : "r_elbow", {
165 : 2, 4}, 2}, {
166 : "r_wrist", {
167 : 3}, 1}, {
168 : "l_shoulder", {
169 : 1, 6}, 2}, {
170 : "l_elbow", {
171 : 5, 7}, 2}, {
172 : "l_wrist", {
173 : 6}, 1}, {
174 : "r_hip", {
175 : 1, 9}, 2}, {
176 : "r_knee", {
177 : 8, 10}, 2}, {
178 : "r_ankle", {
179 : 9}, 1}, {
180 : "l_hip", {
181 : 1, 12}, 2}, {
182 : "l_knee", {
183 : 11, 13}, 2}, {
184 : "l_ankle", {
185 : 12}, 1}
186 : };
187 :
188 : typedef struct pose_metadata_s pose_metadata_t;
189 :
190 : #define POSE_SIZE_DEFAULT (sizeof(pose_metadata_default) / sizeof(pose_metadata_t))
191 :
192 : /**
193 : * @todo Fill in the value at build time or hardcode this. It's const value
194 : * @brief The bitmap of characters
195 : * [Character (ASCII)][Height][Width]
196 : */
197 : static singleLineSprite_t singleLineSprite;
198 :
199 : /**
200 : * @brief Data structure for pose-estimation info.
201 : */
202 : typedef struct
203 : {
204 : /* From option1 */
205 : guint width; /**< Output Video Width */
206 : guint height; /**< Output Video Height */
207 :
208 : /* From option2 */
209 : guint i_width; /**< Input Video Width */
210 : guint i_height; /**< Input Video Height */
211 :
212 : /* From option3 */
213 : pose_metadata_t *metadata; /**< Pose metadata from file, if any*/
214 : guint total_labels; /**< Total number of key body point */
215 :
216 : /* From option4 */
217 : pose_modes mode; /**< The pose estimation decoding mode */
218 : } pose_data;
219 :
220 : /**
221 : * @brief Load key body metadata from file
222 : *
223 : * The file describes the different key body point reported by the model,
224 : * with one line dedicated per key body point.
225 : *
226 : * The first word is the key body string, followed by its connections with other key body point.
227 : * Connections are represented through key body integer id
228 : * Token separator is space, .i.e. ' '
229 : *
230 : * File example of fallback configuration:
231 : *
232 : * top 1
233 : * neck 0 2 5 8 11
234 : * r_shoulder 1 3
235 : * r_elbow 2 4
236 : * r_wrist 3
237 : * l_shoulder 1 6
238 : * l_elbow 5 7
239 : * l_wrist 6 1
240 : * r_hip 1 9
241 : * r_knee 8 10
242 : * r_ankle 9
243 : * l_hip 1 12
244 : * l_knee 11 13
245 : * l_ankle 12
246 : *
247 : * @param[in] file_path The filename path to load
248 : * @param[in] pd The pose data object
249 : * @return Return TRUE on file loading success, otherwise FALSE
250 : */
251 : static gboolean
252 1 : pose_load_metadata_from_file (pose_data * pd, const gchar * file_path)
253 : {
254 1 : gsize len = 0;
255 1 : GError *err = NULL;
256 1 : gchar *contents = NULL;
257 : gchar **lines;
258 : guint i, j;
259 :
260 1 : if (!g_file_test (file_path, G_FILE_TEST_EXISTS)) {
261 0 : GST_WARNING ("Labels file %s does not exist !", file_path);
262 1 : return FALSE;
263 : }
264 :
265 1 : if (!g_file_get_contents (file_path, &contents, &len, &err) || len <= 0) {
266 0 : ml_loge ("Unable to read file %s with error %s.", file_path, err->message);
267 0 : g_clear_error (&err);
268 0 : return FALSE;
269 : }
270 :
271 1 : if (contents[len - 1] == '\n')
272 1 : contents[len - 1] = '\0';
273 :
274 1 : lines = g_strsplit (contents, "\n", -1);
275 1 : pd->total_labels = g_strv_length (lines);
276 1 : pd->metadata = g_new0 (pose_metadata_t, pd->total_labels);
277 :
278 18 : for (i = 0; i < pd->total_labels; i++) {
279 : guint n_tokens;
280 : gchar **tokens;
281 :
282 17 : g_strstrip (lines[i]);
283 17 : tokens = g_strsplit (lines[i], " ", -1);
284 17 : n_tokens = g_strv_length (tokens);
285 17 : if (n_tokens > POSE_MD_MAX_CONNECTIONS_SZ) {
286 0 : GST_WARNING ("Too many connections (%d) declared, clamping (%d)\n",
287 : n_tokens, POSE_MD_MAX_CONNECTIONS_SZ);
288 0 : n_tokens = POSE_MD_MAX_CONNECTIONS_SZ;
289 : }
290 17 : g_strlcpy (pd->metadata[i].label, tokens[0], POSE_MD_MAX_LABEL_SZ);
291 17 : pd->metadata[i].num_connections = n_tokens - 1;
292 55 : for (j = 1; j < n_tokens; j++)
293 38 : pd->metadata[i].connections[j - 1] =
294 38 : (gint) g_ascii_strtoll (tokens[j], NULL, 10);
295 :
296 17 : g_strfreev (tokens);
297 : }
298 :
299 1 : g_strfreev (lines);
300 1 : g_free (contents);
301 :
302 1 : return TRUE;
303 : }
304 :
305 : /** @brief Return pose metadata by id */
306 : static inline pose_metadata_t *
307 1332 : pose_get_metadata_by_id (pose_data * data, guint id)
308 : {
309 1332 : pose_metadata_t *md = data->metadata;
310 :
311 1332 : if (id > data->total_labels)
312 0 : return NULL;
313 :
314 1332 : return &md[id];
315 : }
316 :
317 : /** @brief tensordec-plugin's TensorDecDef callback */
318 : static int
319 4 : pose_init (void **pdata)
320 : {
321 : pose_data *data;
322 :
323 4 : data = *pdata = g_new0 (pose_data, 1);
324 4 : if (data == NULL) {
325 0 : GST_ERROR ("Failed to allocate memory for decoder subplugin.");
326 0 : return FALSE;
327 : }
328 :
329 4 : data->width = 0;
330 4 : data->height = 0;
331 4 : data->i_width = 0;
332 4 : data->i_height = 0;
333 :
334 4 : data->metadata = pose_metadata_default;
335 4 : data->total_labels = POSE_SIZE_DEFAULT;
336 :
337 4 : data->mode = HEATMAP_ONLY;
338 :
339 4 : initSingleLineSprite (singleLineSprite, rasters, PIXEL_VALUE);
340 :
341 4 : return TRUE;
342 : }
343 :
344 : /** @brief tensordec-plugin's TensorDecDef callback */
345 : static void
346 4 : pose_exit (void **pdata)
347 : {
348 4 : pose_data *data = *pdata;
349 :
350 4 : if (data->metadata != pose_metadata_default)
351 1 : g_free (data->metadata);
352 :
353 4 : g_free (*pdata);
354 4 : *pdata = NULL;
355 4 : }
356 :
357 : /** @brief tensordec-plugin's TensorDecDef callback */
358 : static int
359 11 : pose_setOption (void **pdata, int opNum, const char *param)
360 : {
361 11 : pose_data *data = *pdata;
362 :
363 11 : if (opNum == 0) {
364 : /* option1 = output video size (width:height) */
365 : tensor_dim dim;
366 4 : int rank = gst_tensor_parse_dimension (param, dim);
367 :
368 4 : data->width = 0;
369 4 : data->height = 0;
370 4 : if (param == NULL || *param == '\0')
371 4 : return TRUE;
372 :
373 4 : if (rank < 2) {
374 0 : GST_ERROR
375 : ("mode-option-1 of pose estimation is video output dimension (WIDTH:HEIGHT). The given parameter, \"%s\", is not acceptable.",
376 : param);
377 0 : return TRUE; /* Ignore this param */
378 : }
379 4 : if (rank > 2) {
380 0 : GST_WARNING
381 : ("mode-option-1 of pose estimation is video output dimension (WIDTH:HEIGHT). The third and later elements of the given parameter, \"%s\", are ignored.",
382 : param);
383 : }
384 4 : data->width = dim[0];
385 4 : data->height = dim[1];
386 4 : return TRUE;
387 7 : } else if (opNum == 1) {
388 : /* option1 = input model size (width:height) */
389 : tensor_dim dim;
390 4 : int rank = gst_tensor_parse_dimension (param, dim);
391 :
392 4 : data->i_width = 0;
393 4 : data->i_height = 0;
394 4 : if (param == NULL || *param == '\0')
395 4 : return TRUE;
396 :
397 4 : if (rank < 2) {
398 0 : GST_ERROR
399 : ("mode-option-2 of pose estimation is input video dimension (WIDTH:HEIGHT). The given parameter, \"%s\", is not acceptable.",
400 : param);
401 0 : return TRUE;
402 : }
403 4 : if (rank > 2) {
404 0 : GST_WARNING
405 : ("mode-option-2 of pose esitmiation is input video dimension (WIDTH:HEIGHT). The third and later elements of the given parameter, \"%s\", are ignored.",
406 : param);
407 : }
408 4 : data->i_width = dim[0];
409 4 : data->i_height = dim[1];
410 4 : return TRUE;
411 3 : } else if (opNum == 2) {
412 1 : return pose_load_metadata_from_file (data, (const gchar *) param);
413 2 : } else if (opNum == 3) {
414 1 : gint mode = find_key_strv (pose_string_modes, param);
415 1 : if (mode == -1) {
416 0 : GST_ERROR ("Mode %s is not supported\n", param);
417 0 : return FALSE;
418 : }
419 1 : data->mode = mode;
420 :
421 1 : return TRUE;
422 : }
423 :
424 1 : GST_INFO ("Property mode-option-%d is ignored", opNum + 1);
425 1 : return TRUE;
426 : }
427 :
428 : /**
429 : * @brief check the num_tensors is valid
430 : */
431 : static int
432 42 : _check_tensors (const GstTensorsConfig * config)
433 : {
434 : unsigned int i;
435 : GstTensorsInfo *info;
436 : GstTensorInfo *first, *current;
437 :
438 42 : g_return_val_if_fail (config != NULL, FALSE);
439 :
440 42 : info = (GstTensorsInfo *) &config->info;
441 42 : first = gst_tensors_info_get_nth_info (info, 0);
442 :
443 42 : for (i = 1; i < config->info.num_tensors; ++i) {
444 0 : current = gst_tensors_info_get_nth_info (info, i);
445 0 : g_return_val_if_fail (first->type == current->type, FALSE);
446 : }
447 42 : return TRUE;
448 : }
449 :
450 : /**
451 : * @brief tensordec-plugin's TensorDecDef callback
452 : *
453 : * [Pose Estimation]
454 : * Just one tensor with [ 14 (#Joint), WIDTH, HEIGHT, 1]
455 : * One WIDTH:HEIGHT for the each joint.
456 : * Have to find max value after Gaussian Blur
457 : *
458 : */
459 : static GstCaps *
460 42 : pose_getOutCaps (void **pdata, const GstTensorsConfig * config)
461 : {
462 42 : pose_data *data = *pdata;
463 : GstCaps *caps;
464 : int i;
465 : char *str;
466 : guint pose_size;
467 :
468 : const uint32_t *dim;
469 :
470 42 : if (!_check_tensors (config))
471 0 : return NULL;
472 :
473 42 : pose_size = data->total_labels;
474 :
475 : /* Check if the first tensor is compatible */
476 42 : dim = config->info.info[0].dimension;
477 42 : g_return_val_if_fail (dim[0] == pose_size, NULL);
478 210 : for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
479 195 : g_return_val_if_fail (dim[i] <= 1, NULL);
480 :
481 15 : if (data->mode == HEATMAP_OFFSET) {
482 0 : dim = config->info.info[1].dimension;
483 0 : g_return_val_if_fail (dim[0] == (2 * pose_size), NULL);
484 :
485 0 : for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
486 0 : g_return_val_if_fail (dim[i] <= 1, NULL);
487 : }
488 :
489 15 : str = g_strdup_printf ("video/x-raw, format = RGBA, " /* Use alpha channel to make the background transparent */
490 : "width = %u, height = %u", data->width, data->height);
491 15 : caps = gst_caps_from_string (str);
492 15 : setFramerateFromConfig (caps, config);
493 15 : g_free (str);
494 :
495 15 : return caps;
496 : }
497 :
498 : /** @brief tensordec-plugin's TensorDecDef callback */
499 : static size_t
500 44 : pose_getTransformSize (void **pdata, const GstTensorsConfig * config,
501 : GstCaps * caps, size_t size, GstCaps * othercaps, GstPadDirection direction)
502 : {
503 : UNUSED (pdata);
504 : UNUSED (config);
505 : UNUSED (caps);
506 : UNUSED (size);
507 : UNUSED (othercaps);
508 : UNUSED (direction);
509 44 : return 0;
510 : }
511 :
512 : /** @brief Represents a pose */
513 : typedef struct
514 : {
515 : int valid;
516 : int x;
517 : int y;
518 : gfloat prob;
519 : } pose;
520 :
521 : /**
522 : * @brief Check if a value is within lower and upper bounds
523 : * @param value the value to check
524 : * @param lower_b the lower bound (inclusive)
525 : * @param upper_b the uppoer bound (exclusive)
526 : * @return TRUE if the value is within the bounds, otherwise FALSE
527 : */
528 : static gboolean
529 15370 : is_value_within(int value, int lower_b, int upper_b)
530 : {
531 15370 : if (value < lower_b) {
532 0 : return FALSE;
533 15370 : } else if (value >= upper_b) {
534 0 : return FALSE;
535 : } else {
536 15370 : return TRUE;
537 : }
538 : }
539 :
540 : /**
541 : * @brief Fill in pixel with PIXEL_VALUE at x,y position. Make thicker (x+1, y+1)
542 : * @param[out] out_info The output buffer (RGBA plain)
543 : * @param[in] bdata The pose-estimation internal data.
544 : * @param[in] coordinate of pixel
545 : */
546 : static void
547 7685 : setpixel (uint32_t * frame, pose_data * data, int x, int y)
548 : {
549 7685 : if (is_value_within(x, 0, data->width) && is_value_within(y, 0, data->height)) {
550 7685 : uint32_t *pos = &frame[y * data->width + x];
551 7685 : *pos = PIXEL_VALUE;
552 :
553 7685 : if (x + 1 < (int) data->width) {
554 7685 : pos = &frame[y * data->width + x + 1];
555 7685 : *pos = PIXEL_VALUE;
556 : }
557 7685 : if (y + 1 < (int) data->height) {
558 7685 : pos = &frame[(y + 1) * data->width + x];
559 7685 : *pos = PIXEL_VALUE;
560 : }
561 : }
562 7685 : }
563 :
564 : /**
565 : * @brief Draw line with dot at the end of line
566 : * @param[out] out_info The output buffer (RGBA plain)
567 : * @param[in] bdata The pose-estimation internal data.
568 : * @param[in] coordinate of two end point of line
569 : */
570 : static void
571 680 : draw_line_with_dot (uint32_t * frame, pose_data * data, int x1, int y1, int x2,
572 : int y2)
573 : {
574 : int i, dx, sx, dy, sy, err;
575 : uint32_t *pos;
576 680 : int xx[40] =
577 : { -4, 0, 4, 0, -3, -3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1,
578 : 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3
579 : };
580 680 : int yy[40] =
581 : { 0, -4, 0, 4, -1, 0, 1, -2, -1, 0, 1, 2, -3, -2, -1, 0, 1, 2, 3, -3, -2,
582 : -1, 1, 2, 3, -3, -2, -1, 0, 1, 2, 3, -2, -1, 0, 1, 2, -1, 0, 1
583 : };
584 :
585 : int xs, ys, xe, ye;
586 :
587 680 : if (x1 > x2) {
588 98 : xs = x2;
589 98 : ys = y2;
590 98 : xe = x1;
591 98 : ye = y1;
592 : } else {
593 582 : xs = x1;
594 582 : ys = y1;
595 582 : xe = x2;
596 582 : ye = y2;
597 : }
598 :
599 :
600 27880 : for (i = 0; i < 40; i++) {
601 27200 : if ((ys + yy[i] >= 0) && (ys + yy[i] < (int) data->height) &&
602 16320 : (xs + xx[i] >= 0) && (xs + xx[i] < (int) data->width)) {
603 9920 : pos = &frame[(ys + yy[i]) * data->width + xs + xx[i]];
604 9920 : *pos = PIXEL_VALUE;
605 : }
606 27200 : if ((ye + yy[i] >= 0) && (ye + yy[i] < (int) data->height) &&
607 16320 : (xe + xx[i] >= 0) && (xe + xx[i] < (int) data->width)) {
608 11920 : pos = &frame[(ye + yy[i]) * data->width + xe + xx[i]];
609 11920 : *pos = PIXEL_VALUE;
610 : }
611 : }
612 :
613 :
614 680 : dx = abs (xe - xs);
615 680 : sx = xs < xe ? 1 : -1;
616 680 : dy = abs (ye - ys);
617 680 : sy = ys < ye ? 1 : -1;
618 680 : err = (dx > dy ? dx : -dy) / 2;
619 :
620 7685 : while (setpixel (frame, data, xs, ys), xs != xe || ys != ye) {
621 7005 : int e2 = err;
622 7005 : if (e2 > -dx) {
623 7005 : err -= dy;
624 7005 : xs += sx;
625 : }
626 7005 : if (e2 < dy) {
627 0 : err += dx;
628 0 : ys += sy;
629 : }
630 : }
631 680 : }
632 :
633 : /**
634 : * @brief Draw label with the given results (pose) to the output buffer
635 : * @param[out] out_info The output buffer (RGBA plain)
636 : * @param[in] bdata The pose-estimation internal data.
637 : * @param[in] results The final results to be drawn.
638 : */
639 : static void
640 44 : draw_label (uint32_t * frame, pose_data * data, pose * xydata)
641 : {
642 : int x1, y1, x2, y2;
643 : uint32_t *pos1, *pos2;
644 :
645 : guint i;
646 44 : guint pose_size = data->total_labels;
647 : char *label;
648 720 : for (i = 0; i < pose_size; i++) {
649 676 : if (xydata[i].valid) {
650 666 : pose_metadata_t *md = pose_get_metadata_by_id (data, i);
651 : gsize j, label_len;
652 :
653 666 : x1 = xydata[i].x;
654 666 : y1 = xydata[i].y;
655 666 : if (md == NULL)
656 0 : continue;
657 666 : label = md->label;
658 666 : label_len = label ? strlen (label) : 0;
659 666 : y1 = MAX (0, (y1 - 14));
660 666 : pos1 = &frame[y1 * data->width + x1];
661 5729 : for (j = 0; j < label_len; j++) {
662 5070 : unsigned int char_index = label[j];
663 5070 : if ((x1 + 8) > (int) data->width)
664 7 : break;
665 5063 : pos2 = pos1;
666 70882 : for (y2 = 0; y2 < 13; y2++) {
667 592371 : for (x2 = 0; x2 < 8; x2++) {
668 526552 : *(pos2 + x2) = singleLineSprite[char_index][y2][x2];
669 : }
670 65819 : pos2 += data->width;
671 : }
672 5063 : x1 += 9;
673 5063 : pos1 += 9;
674 : }
675 : }
676 : }
677 44 : }
678 :
679 : /**
680 : * @brief Draw with the given results (pose) to the output buffer
681 : * @param[out] out_info The output buffer (RGBA plain)
682 : * @param[in] bdata The pose-estimation internal data.
683 : * @param[in] results The final results to be drawn.
684 : */
685 : static void
686 44 : draw (GstMapInfo * out_info, pose_data * data, GArray * results)
687 : {
688 : guint i;
689 : gint j;
690 44 : uint32_t *frame = (uint32_t *) out_info->data; /* Let's draw per pixel (4bytes) */
691 44 : guint pose_size = data->total_labels;
692 :
693 44 : pose **XYdata = g_new0 (pose *, pose_size);
694 44 : if (!XYdata) {
695 0 : ml_loge ("The memory allocation is failed.");
696 0 : return;
697 : }
698 :
699 720 : for (i = 0; i < pose_size; i++) {
700 676 : XYdata[i] = &g_array_index (results, pose, i);
701 676 : if (XYdata[i]->prob < 0.5) {
702 10 : XYdata[i]->valid = FALSE;
703 : }
704 : }
705 :
706 720 : for (i = 0; i < pose_size; i++) {
707 : pose_metadata_t *smd;
708 676 : if (XYdata[i]->valid == FALSE)
709 10 : continue;
710 666 : smd = pose_get_metadata_by_id (data, i);
711 666 : if (smd == NULL)
712 0 : continue;
713 2030 : for (j = 0; j < smd->num_connections; j++) {
714 1364 : guint k = smd->connections[j];
715 : /* Have we already drawn the connection ? */
716 1364 : if ((k > data->total_labels) || (k < i))
717 683 : continue;
718 : /* Is the body point valid ? */
719 681 : if (XYdata[k]->valid == FALSE)
720 1 : continue;
721 680 : draw_line_with_dot (frame, data,
722 680 : XYdata[i]->x, XYdata[i]->y, XYdata[k]->x, XYdata[k]->y);
723 : }
724 : }
725 :
726 44 : draw_label (frame, data, *XYdata);
727 :
728 44 : g_free (XYdata);
729 : }
730 :
731 : /** @brief tensordec-plugin's TensorDecDef callback */
732 : static GstFlowReturn
733 44 : pose_decode (void **pdata, const GstTensorsConfig * config,
734 : const GstTensorMemory * input, GstBuffer * outbuf)
735 : {
736 44 : pose_data *data = *pdata;
737 44 : const size_t size = (size_t) data->width * data->height * 4; /* RGBA */
738 : GstMapInfo out_info;
739 : GstMemory *out_mem;
740 44 : GArray *results = NULL;
741 44 : const GstTensorMemory *detections = NULL;
742 : float *arr;
743 : int i, j;
744 : int grid_xsize, grid_ysize;
745 : guint pose_size, index;
746 :
747 44 : g_assert (outbuf); /** GST Internal Bug */
748 : /* Ensure we have outbuf properly allocated */
749 44 : if (gst_buffer_get_size (outbuf) == 0) {
750 44 : out_mem = gst_allocator_alloc (NULL, size, NULL);
751 : } else {
752 0 : if (gst_buffer_get_size (outbuf) < size) {
753 0 : gst_buffer_set_size (outbuf, size);
754 : }
755 0 : out_mem = gst_buffer_get_all_memory (outbuf);
756 : }
757 44 : if (!gst_memory_map (out_mem, &out_info, GST_MAP_WRITE)) {
758 0 : gst_memory_unref (out_mem);
759 0 : ml_loge ("Cannot map output memory / tensordec-pose.\n");
760 44 : return GST_FLOW_ERROR;
761 : }
762 : /** reset the buffer with alpha 0 / black */
763 44 : memset (out_info.data, 0, size);
764 :
765 44 : pose_size = data->total_labels;
766 :
767 44 : grid_xsize = config->info.info[0].dimension[1];
768 44 : grid_ysize = config->info.info[0].dimension[2];
769 :
770 44 : results = g_array_sized_new (FALSE, TRUE, sizeof (pose), pose_size);
771 44 : detections = &input[0];
772 44 : arr = detections->data;
773 720 : for (index = 0; index < pose_size; index++) {
774 676 : int maxX = 0;
775 676 : int maxY = 0;
776 676 : float max = G_MINFLOAT;
777 : pose p;
778 1352 : for (j = 0; j < grid_ysize; j++) {
779 11160 : for (i = 0; i < grid_xsize; i++) {
780 10484 : float cen = arr[i * pose_size + j * grid_xsize * pose_size + index];
781 10484 : if (data->mode == HEATMAP_OFFSET) {
782 0 : cen = _sigmoid (cen);
783 : }
784 10484 : if (cen > max) {
785 889 : max = cen;
786 889 : maxX = i;
787 889 : maxY = j;
788 : }
789 : }
790 : }
791 :
792 676 : p.valid = TRUE;
793 676 : p.prob = max;
794 676 : if (data->mode == HEATMAP_OFFSET) {
795 0 : const gfloat *offset = ((const GstTensorMemory *) &input[1])->data;
796 : gfloat offsetX, offsetY, posX, posY;
797 : int offsetIdx;
798 0 : offsetIdx = (maxY * grid_xsize + maxX) * pose_size * 2 + index;
799 0 : offsetY = offset[offsetIdx];
800 0 : offsetX = offset[offsetIdx + pose_size];
801 0 : posX = (((gfloat) maxX) / (grid_xsize - 1)) * data->i_width + offsetX;
802 0 : posY = (((gfloat) maxY) / (grid_ysize - 1)) * data->i_height + offsetY;
803 0 : p.x = posX * data->width / data->i_width;
804 0 : p.y = posY * data->height / data->i_height;
805 :
806 : } else {
807 676 : p.x = (maxX * data->width) / data->i_width;
808 676 : p.y = (maxY * data->height) / data->i_height;
809 : }
810 : /* Some keypoints can be estimated slightly out of image range */
811 676 : p.x = MIN (data->width, (guint) (MAX (0, p.x)));
812 676 : p.y = MIN (data->height, (guint) (MAX (0, p.y)));
813 :
814 676 : g_array_append_val (results, p);
815 : }
816 :
817 44 : draw (&out_info, data, results);
818 44 : g_array_free (results, TRUE);
819 44 : gst_memory_unmap (out_mem, &out_info);
820 44 : if (gst_buffer_get_size (outbuf) == 0)
821 44 : gst_buffer_append_memory (outbuf, out_mem);
822 : else
823 0 : gst_buffer_replace_all_memory (outbuf, out_mem);
824 :
825 44 : return GST_FLOW_OK;
826 : }
827 :
828 : static gchar decoder_subplugin_pose_estimation[] = "pose_estimation";
829 : /** @brief Pose Estimation tensordec-plugin TensorDecDef instance */
830 : static GstTensorDecoderDef poseEstimation = {
831 : .modename = decoder_subplugin_pose_estimation,
832 : .init = pose_init,
833 : .exit = pose_exit,
834 : .setOption = pose_setOption,
835 : .getOutCaps = pose_getOutCaps,
836 : .getTransformSize = pose_getTransformSize,
837 : .decode = pose_decode
838 : };
839 :
840 : /** @brief Initialize this object for tensordec-plugin */
841 : void
842 35 : init_pose (void)
843 : {
844 35 : nnstreamer_decoder_probe (&poseEstimation);
845 35 : }
846 :
847 : /** @brief Destruct this object for tensordec-plugin */
848 : void
849 35 : fini_pose (void)
850 : {
851 35 : nnstreamer_decoder_exit (poseEstimation.modename);
852 35 : }
|