LCOV - code coverage report
Current view: top level - nnstreamer-2.4.2/ext/nnstreamer/tensor_decoder - tensordec-pose.c (source / functions) Coverage Total Hit
Test: nnstreamer 2.4.2-0 nnstreamer/nnstreamer#eca68b8d050408568af95d831a8eef62aaee7784 Lines: 84.1 % 296 249
Test Date: 2025-03-13 05:38:21 Functions: 100.0 % 16 16

            Line data    Source code
       1              : /**
       2              :  * GStreamer / NNStreamer tensor_decoder subplugin, "Pose estimation"
       3              :  * Copyright (C) 2019 Samsung Electronics Co. Ltd.
       4              :  * Copyright (C) 2019 Jijoong Moon <>
       5              :  *
       6              :  * This library is free software; you can redistribute it and/or
       7              :  * modify it under the terms of the GNU Library General Public
       8              :  * License as published by the Free Software Foundation;
       9              :  * version 2.1 of the License.
      10              :  *
      11              :  * This library is distributed in the hope that it will be useful,
      12              :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14              :  * Library General Public License for more details.
      15              :  *
      16              :  */
      17              : /**
      18              :  * @file        tensordec-pose.c
      19              :  * @date        13 May 2019
      20              :  * @brief       NNStreamer tensor-decoder subplugin, "pose estimation",
      21              :  *              which converts tensors to video stream w/ pose on
      22              :  *              transparent background.
      23              :  *              This code is NYI/WIP and not compilable.
      24              :  *
      25              :  * @see
      26              :  * @author      Jijoong Moon <>
      27              :  * @bug         No known bugs except for NYI items
      28              :  *
      29              :  * option1: Video Output Dimension (WIDTH:HEIGHT)
      30              :  * option2: Input Dimension (WIDTH:HEIGHT)
      31              :  * option3: Location of label file (optional)
      32              :  *      The file describes the keypoints and their body connections.
      33              :  *      A line per keypoint description is expected with the following syntax:
      34              :  *      <label name> <keypoint id> <keypoint id>
      35              :  *
      36              :  *      For instance, the posenet model label description of model
      37              :  *
      38              :  *      would be the following:
      39              :  *      nose 1 2 3 4
      40              :  *      leftEye 0 2 3
      41              :  *      rightEye 0 1 4
      42              :  *      leftEar 0 1
      43              :  *      rightEar 0 2
      44              :  *      leftShoulder 6 7 11
      45              :  *      rightShoulder 5 8 12
      46              :  *      leftElbow 5 9
      47              :  *      rightElbow 6 10
      48              :  *      leftWrist 7
      49              :  *      rightWrist 8
      50              :  *      leftHip 5 12 13
      51              :  *      rightHip 6 11 14
      52              :  *      leftKnee 11 15
      53              :  *      rightKnee 12 16
      54              :  *      leftAnkle 13
      55              :  *      rightAnkle 14
      56              :  *
      57              :  * option4: Mode (optional)
      58              :  *      Available: heatmap-only (default)
      59              :  *                 heatmap-offset
      60              :  *
      61              :  *      Expected input dims:
      62              :  *              Note: Width, Height are related to heatmap resolution.
      63              :  *              - heatmap-only:
      64              :  *                      Tensors mapping: Heatmap
      65              :  *                      Tensor[0]: #labels x width x height (float32, label probability)
      66              :  *                              (e.g., 14 x 33 x 33 )
      67              :  *              - heatmap-offset:
      68              :  *                      Compatible with posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite
      69              :  *                      Tensors mapping: Heatmap, Offset
      70              :  *                      Tensor[0]: #labels : width : height (float32, label sigmoid probability)
      71              :  *                              (e.g., 17 x 9 x 9 )
      72              :  *                      Tensor[1]: #labels x 2: width : height (float32, Offset position within heatmap grid)
      73              :  *                              (e.g., 34 x 9 x 9 )
      74              :  *
      75              :  * Pipeline:
      76              :  *      v4l2src
      77              :  *         |
      78              :  *      videoconvert
      79              :  *         |
      80              :  *      videoscale -- tee ------------------------------------------------- compositor -- videoconvert -- ximagesink
      81              :  *                      |                                                       |
      82              :  *                 videoscale                                                   |
      83              :  *                      |                                                       |
      84              :  *                 tensor_converter -- tensor_transform -- tensor_filter -- tensor_decoder
      85              :  *
      86              :  *      - Used model is posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite
      87              :  *      - Resize image into 257:257 at the second videoscale.
      88              :  *      - Transform RGB value into float32 in range [0,1] at tensor_transform.
      89              :  *
      90              :  *      gst-launch-1.0 v4l2src ! videoconvert ! videoscale ! \
      91              :  *         video/x-raw,format=RGB,width=640,height=480,framerate=30/1 ! \tee name=t \
      92              :  *         t. ! queue ! videoscale ! video/x-raw,width=257,height=257,format=RGB ! \
      93              :  *         tensor_converter ! tensor_transform mode=arithmetic option=typecast:float32,add:-127.5,div:127.5 ! \
      94              :  *         tensor_filter framework=tensorflow-lite model=posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite ! \
      95              :  *         tensor_decoder mode=pose_estimation option1=640:480 option2=257:257 option3=pose_label.txt option4=heatmap-offset ! \
      96              :  *         compositor name=mix sink_0::zorder=1 sink_1::zorder=0 ! videoconvert ! ximagesink \
      97              :  *         t. ! queue ! mix.
      98              :  */
      99              : 
     100              : #include <stdlib.h>
     101              : #include <string.h>
     102              : #include <stdint.h>
     103              : #include <math.h>
     104              : #include <glib.h>
     105              : #include <gst/gst.h>
     106              : #include <nnstreamer_plugin_api_decoder.h>
     107              : #include <nnstreamer_plugin_api.h>
     108              : #include <nnstreamer_log.h>
     109              : #include <nnstreamer_util.h>
     110              : #include "tensordecutil.h"
     111              : 
     112              : void init_pose (void) __attribute__ ((constructor));
     113              : void fini_pose (void) __attribute__ ((destructor));
     114              : 
     115              : /* font.c */
     116              : extern uint8_t rasters[][13];
     117              : 
     118              : #define PIXEL_VALUE               (0xFFFFFFFF)
     119              : 
     120              : #define POSE_MD_MAX_LABEL_SZ 16
     121              : #define POSE_MD_MAX_CONNECTIONS_SZ 8
     122              : 
     123              : /**
     124              :  * @brief Macro for calculating sigmoid
     125              :  */
     126              : #define _sigmoid(x) \
     127              :     (1.f / (1.f + expf (-x)))
     128              : 
     129              : /**
     130              :  * @brief There can be different schemes for pose estimation decoding scheme.
     131              :  */
     132              : typedef enum
     133              : {
     134              :   HEATMAP_ONLY = 0,
     135              :   HEATMAP_OFFSET = 1,
     136              :   HEATMAP_UNKNOWN,
     137              : } pose_modes;
     138              : 
     139              : /**
     140              :  * @brief List of pose estimation decoding schemes in string
     141              :  */
     142              : static const char *pose_string_modes[] = {
     143              :   [HEATMAP_ONLY] = "heatmap-only",
     144              :   [HEATMAP_OFFSET] = "heatmap-offset",
     145              :   NULL,
     146              : };
     147              : 
     148              : /**
     149              :  * @brief Data structure for key body point description.
     150              :  */
     151              : static struct pose_metadata_s
     152              : {
     153              :   gchar label[POSE_MD_MAX_LABEL_SZ]; /**< Key body name */
     154              :   gint connections[POSE_MD_MAX_CONNECTIONS_SZ];/**< Connections list */
     155              :   gint num_connections; /** Total number of connections */
     156              : } pose_metadata_default[] = {
     157              :   {
     158              :     "top", {
     159              :   1}, 1}, {
     160              :     "neck", {
     161              :   0, 2, 5, 8, 11}, 5}, {
     162              :     "r_shoulder", {
     163              :   1, 3}, 2}, {
     164              :     "r_elbow", {
     165              :   2, 4}, 2}, {
     166              :     "r_wrist", {
     167              :   3}, 1}, {
     168              :     "l_shoulder", {
     169              :   1, 6}, 2}, {
     170              :     "l_elbow", {
     171              :   5, 7}, 2}, {
     172              :     "l_wrist", {
     173              :   6}, 1}, {
     174              :     "r_hip", {
     175              :   1, 9}, 2}, {
     176              :     "r_knee", {
     177              :   8, 10}, 2}, {
     178              :     "r_ankle", {
     179              :   9}, 1}, {
     180              :     "l_hip", {
     181              :   1, 12}, 2}, {
     182              :     "l_knee", {
     183              :   11, 13}, 2}, {
     184              :     "l_ankle", {
     185              :   12}, 1}
     186              : };
     187              : 
     188              : typedef struct pose_metadata_s pose_metadata_t;
     189              : 
     190              : #define POSE_SIZE_DEFAULT   (sizeof(pose_metadata_default) / sizeof(pose_metadata_t))
     191              : 
     192              : /**
     193              :  * @todo Fill in the value at build time or hardcode this. It's const value
     194              :  * @brief The bitmap of characters
     195              :  * [Character (ASCII)][Height][Width]
     196              :  */
     197              : static singleLineSprite_t singleLineSprite;
     198              : 
     199              : /**
     200              :  * @brief Data structure for pose-estimation info.
     201              :  */
     202              : typedef struct
     203              : {
     204              :   /* From option1 */
     205              :   guint width; /**< Output Video Width */
     206              :   guint height; /**< Output Video Height */
     207              : 
     208              :   /* From option2 */
     209              :   guint i_width; /**< Input Video Width */
     210              :   guint i_height; /**< Input Video Height */
     211              : 
     212              :   /* From option3 */
     213              :   pose_metadata_t *metadata; /**< Pose metadata from file, if any*/
     214              :   guint total_labels; /**< Total number of key body point */
     215              : 
     216              :   /* From option4 */
     217              :   pose_modes mode; /**< The pose estimation decoding mode */
     218              : } pose_data;
     219              : 
     220              : /**
     221              :  * @brief Load key body metadata from file
     222              :  *
     223              :  * The file describes the different key body point reported by the model,
     224              :  * with one line dedicated per key body point.
     225              :  *
     226              :  * The first word is the key body string, followed by its connections with other key body point.
     227              :  * Connections are represented through key body integer id
     228              :  * Token separator is space, .i.e. ' '
     229              :  *
     230              :  * File example of fallback configuration:
     231              :  *
     232              :  * top 1
     233              :  * neck 0 2 5 8 11
     234              :  * r_shoulder 1 3
     235              :  * r_elbow 2 4
     236              :  * r_wrist 3
     237              :  * l_shoulder 1 6
     238              :  * l_elbow 5 7
     239              :  * l_wrist 6 1
     240              :  * r_hip 1 9
     241              :  * r_knee 8 10
     242              :  * r_ankle 9
     243              :  * l_hip 1 12
     244              :  * l_knee 11 13
     245              :  * l_ankle 12
     246              :  *
     247              :  * @param[in] file_path The filename path to load
     248              :  * @param[in] pd The pose data object
     249              :  * @return Return TRUE on file loading success, otherwise FALSE
     250              :  */
     251              : static gboolean
     252            1 : pose_load_metadata_from_file (pose_data * pd, const gchar * file_path)
     253              : {
     254            1 :   gsize len = 0;
     255            1 :   GError *err = NULL;
     256            1 :   gchar *contents = NULL;
     257              :   gchar **lines;
     258              :   guint i, j;
     259              : 
     260            1 :   if (!g_file_test (file_path, G_FILE_TEST_EXISTS)) {
     261            0 :     GST_WARNING ("Labels file %s does not exist !", file_path);
     262            1 :     return FALSE;
     263              :   }
     264              : 
     265            1 :   if (!g_file_get_contents (file_path, &contents, &len, &err) || len <= 0) {
     266            0 :     ml_loge ("Unable to read file %s with error %s.", file_path, err->message);
     267            0 :     g_clear_error (&err);
     268            0 :     return FALSE;
     269              :   }
     270              : 
     271            1 :   if (contents[len - 1] == '\n')
     272            1 :     contents[len - 1] = '\0';
     273              : 
     274            1 :   lines = g_strsplit (contents, "\n", -1);
     275            1 :   pd->total_labels = g_strv_length (lines);
     276            1 :   pd->metadata = g_new0 (pose_metadata_t, pd->total_labels);
     277              : 
     278           18 :   for (i = 0; i < pd->total_labels; i++) {
     279              :     guint n_tokens;
     280              :     gchar **tokens;
     281              : 
     282           17 :     g_strstrip (lines[i]);
     283           17 :     tokens = g_strsplit (lines[i], " ", -1);
     284           17 :     n_tokens = g_strv_length (tokens);
     285           17 :     if (n_tokens > POSE_MD_MAX_CONNECTIONS_SZ) {
     286            0 :       GST_WARNING ("Too many connections (%d) declared, clamping (%d)\n",
     287              :           n_tokens, POSE_MD_MAX_CONNECTIONS_SZ);
     288            0 :       n_tokens = POSE_MD_MAX_CONNECTIONS_SZ;
     289              :     }
     290           17 :     g_strlcpy (pd->metadata[i].label, tokens[0], POSE_MD_MAX_LABEL_SZ);
     291           17 :     pd->metadata[i].num_connections = n_tokens - 1;
     292           55 :     for (j = 1; j < n_tokens; j++)
     293           38 :       pd->metadata[i].connections[j - 1] =
     294           38 :           (gint) g_ascii_strtoll (tokens[j], NULL, 10);
     295              : 
     296           17 :     g_strfreev (tokens);
     297              :   }
     298              : 
     299            1 :   g_strfreev (lines);
     300            1 :   g_free (contents);
     301              : 
     302            1 :   return TRUE;
     303              : }
     304              : 
     305              : /** @brief Return pose metadata by id */
     306              : static inline pose_metadata_t *
     307         1332 : pose_get_metadata_by_id (pose_data * data, guint id)
     308              : {
     309         1332 :   pose_metadata_t *md = data->metadata;
     310              : 
     311         1332 :   if (id > data->total_labels)
     312            0 :     return NULL;
     313              : 
     314         1332 :   return &md[id];
     315              : }
     316              : 
     317              : /** @brief tensordec-plugin's TensorDecDef callback */
     318              : static int
     319            4 : pose_init (void **pdata)
     320              : {
     321              :   pose_data *data;
     322              : 
     323            4 :   data = *pdata = g_new0 (pose_data, 1);
     324            4 :   if (data == NULL) {
     325            0 :     GST_ERROR ("Failed to allocate memory for decoder subplugin.");
     326            0 :     return FALSE;
     327              :   }
     328              : 
     329            4 :   data->width = 0;
     330            4 :   data->height = 0;
     331            4 :   data->i_width = 0;
     332            4 :   data->i_height = 0;
     333              : 
     334            4 :   data->metadata = pose_metadata_default;
     335            4 :   data->total_labels = POSE_SIZE_DEFAULT;
     336              : 
     337            4 :   data->mode = HEATMAP_ONLY;
     338              : 
     339            4 :   initSingleLineSprite (singleLineSprite, rasters, PIXEL_VALUE);
     340              : 
     341            4 :   return TRUE;
     342              : }
     343              : 
     344              : /** @brief tensordec-plugin's TensorDecDef callback */
     345              : static void
     346            4 : pose_exit (void **pdata)
     347              : {
     348            4 :   pose_data *data = *pdata;
     349              : 
     350            4 :   if (data->metadata != pose_metadata_default)
     351            1 :     g_free (data->metadata);
     352              : 
     353            4 :   g_free (*pdata);
     354            4 :   *pdata = NULL;
     355            4 : }
     356              : 
     357              : /** @brief tensordec-plugin's TensorDecDef callback */
     358              : static int
     359           11 : pose_setOption (void **pdata, int opNum, const char *param)
     360              : {
     361           11 :   pose_data *data = *pdata;
     362              : 
     363           11 :   if (opNum == 0) {
     364              :     /* option1 = output video size (width:height) */
     365              :     tensor_dim dim;
     366            4 :     int rank = gst_tensor_parse_dimension (param, dim);
     367              : 
     368            4 :     data->width = 0;
     369            4 :     data->height = 0;
     370            4 :     if (param == NULL || *param == '\0')
     371            4 :       return TRUE;
     372              : 
     373            4 :     if (rank < 2) {
     374            0 :       GST_ERROR
     375              :           ("mode-option-1 of pose estimation is video output dimension (WIDTH:HEIGHT). The given parameter, \"%s\", is not acceptable.",
     376              :           param);
     377            0 :       return TRUE;              /* Ignore this param */
     378              :     }
     379            4 :     if (rank > 2) {
     380            0 :       GST_WARNING
     381              :           ("mode-option-1 of pose estimation is video output dimension (WIDTH:HEIGHT). The third and later elements of the given parameter, \"%s\", are ignored.",
     382              :           param);
     383              :     }
     384            4 :     data->width = dim[0];
     385            4 :     data->height = dim[1];
     386            4 :     return TRUE;
     387            7 :   } else if (opNum == 1) {
     388              :     /* option1 = input model size (width:height) */
     389              :     tensor_dim dim;
     390            4 :     int rank = gst_tensor_parse_dimension (param, dim);
     391              : 
     392            4 :     data->i_width = 0;
     393            4 :     data->i_height = 0;
     394            4 :     if (param == NULL || *param == '\0')
     395            4 :       return TRUE;
     396              : 
     397            4 :     if (rank < 2) {
     398            0 :       GST_ERROR
     399              :           ("mode-option-2 of pose estimation is input video dimension (WIDTH:HEIGHT). The given parameter, \"%s\", is not acceptable.",
     400              :           param);
     401            0 :       return TRUE;
     402              :     }
     403            4 :     if (rank > 2) {
     404            0 :       GST_WARNING
     405              :           ("mode-option-2 of pose esitmiation is input video dimension (WIDTH:HEIGHT). The third and later elements of the given parameter, \"%s\", are ignored.",
     406              :           param);
     407              :     }
     408            4 :     data->i_width = dim[0];
     409            4 :     data->i_height = dim[1];
     410            4 :     return TRUE;
     411            3 :   } else if (opNum == 2) {
     412            1 :     return pose_load_metadata_from_file (data, (const gchar *) param);
     413            2 :   } else if (opNum == 3) {
     414            1 :     gint mode = find_key_strv (pose_string_modes, param);
     415            1 :     if (mode == -1) {
     416            0 :       GST_ERROR ("Mode %s is not supported\n", param);
     417            0 :       return FALSE;
     418              :     }
     419            1 :     data->mode = mode;
     420              : 
     421            1 :     return TRUE;
     422              :   }
     423              : 
     424            1 :   GST_INFO ("Property mode-option-%d is ignored", opNum + 1);
     425            1 :   return TRUE;
     426              : }
     427              : 
     428              : /**
     429              :  * @brief check the num_tensors is valid
     430              : */
     431              : static int
     432           42 : _check_tensors (const GstTensorsConfig * config)
     433              : {
     434              :   unsigned int i;
     435              :   GstTensorsInfo *info;
     436              :   GstTensorInfo *first, *current;
     437              : 
     438           42 :   g_return_val_if_fail (config != NULL, FALSE);
     439              : 
     440           42 :   info = (GstTensorsInfo *) &config->info;
     441           42 :   first = gst_tensors_info_get_nth_info (info, 0);
     442              : 
     443           42 :   for (i = 1; i < config->info.num_tensors; ++i) {
     444            0 :     current = gst_tensors_info_get_nth_info (info, i);
     445            0 :     g_return_val_if_fail (first->type == current->type, FALSE);
     446              :   }
     447           42 :   return TRUE;
     448              : }
     449              : 
     450              : /**
     451              :  * @brief tensordec-plugin's TensorDecDef callback
     452              :  *
     453              :  * [Pose Estimation]
     454              :  * Just one tensor with [ 14 (#Joint), WIDTH, HEIGHT, 1]
     455              :  * One WIDTH:HEIGHT for the each joint.
     456              :  * Have to find max value after Gaussian Blur
     457              :  *
     458              :  */
     459              : static GstCaps *
     460           42 : pose_getOutCaps (void **pdata, const GstTensorsConfig * config)
     461              : {
     462           42 :   pose_data *data = *pdata;
     463              :   GstCaps *caps;
     464              :   int i;
     465              :   char *str;
     466              :   guint pose_size;
     467              : 
     468              :   const uint32_t *dim;
     469              : 
     470           42 :   if (!_check_tensors (config))
     471            0 :     return NULL;
     472              : 
     473           42 :   pose_size = data->total_labels;
     474              : 
     475              :   /* Check if the first tensor is compatible */
     476           42 :   dim = config->[0].dimension;
     477           42 :   g_return_val_if_fail (dim[0] == pose_size, NULL);
     478          210 :   for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
     479          195 :     g_return_val_if_fail (dim[i] <= 1, NULL);
     480              : 
     481           15 :   if (data->mode == HEATMAP_OFFSET) {
     482            0 :     dim = config->[1].dimension;
     483            0 :     g_return_val_if_fail (dim[0] == (2 * pose_size), NULL);
     484              : 
     485            0 :     for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
     486            0 :       g_return_val_if_fail (dim[i] <= 1, NULL);
     487              :   }
     488              : 
     489           15 :   str = g_strdup_printf ("video/x-raw, format = RGBA, " /* Use alpha channel to make the background transparent */
     490              :       "width = %u, height = %u", data->width, data->height);
     491           15 :   caps = gst_caps_from_string (str);
     492           15 :   setFramerateFromConfig (caps, config);
     493           15 :   g_free (str);
     494              : 
     495           15 :   return caps;
     496              : }
     497              : 
     498              : /** @brief tensordec-plugin's TensorDecDef callback */
     499              : static size_t
     500           44 : pose_getTransformSize (void **pdata, const GstTensorsConfig * config,
     501              :     GstCaps * caps, size_t size, GstCaps * othercaps, GstPadDirection direction)
     502              : {
     503              :   UNUSED (pdata);
     504              :   UNUSED (config);
     505              :   UNUSED (caps);
     506              :   UNUSED (size);
     507              :   UNUSED (othercaps);
     508              :   UNUSED (direction);
     509           44 :   return 0;
     510              : }
     511              : 
     512              : /** @brief Represents a pose */
     513              : typedef struct
     514              : {
     515              :   int valid;
     516              :   int x;
     517              :   int y;
     518              :   gfloat prob;
     519              : } pose;
     520              : 
     521              : /**
     522              :  * @brief Check if a value is within lower and upper bounds
     523              :  * @param value the value to check
     524              :  * @param lower_b the lower bound (inclusive)
     525              :  * @param upper_b the uppoer bound (exclusive)
     526              :  * @return TRUE if the value is within the bounds, otherwise FALSE
     527              :  */
     528              : static gboolean
     529        15370 : is_value_within(int value, int lower_b, int upper_b)
     530              : {
     531        15370 :     if (value < lower_b) {
     532            0 :         return FALSE;
     533        15370 :     } else if (value >= upper_b) {
     534            0 :         return FALSE;
     535              :     } else {
     536        15370 :         return TRUE;
     537              :     }
     538              : }
     539              : 
     540              : /**
     541              :  * @brief Fill in pixel with PIXEL_VALUE at x,y position. Make thicker (x+1, y+1)
     542              :  * @param[out] out_info The output buffer (RGBA plain)
     543              :  * @param[in] bdata The pose-estimation internal data.
     544              :  * @param[in] coordinate of pixel
     545              :  */
     546              : static void
     547         7685 : setpixel (uint32_t * frame, pose_data * data, int x, int y)
     548              : {
     549         7685 :     if (is_value_within(x, 0, data->width) && is_value_within(y, 0, data->height)) {
     550         7685 :         uint32_t *pos = &frame[y * data->width + x];
     551         7685 :         *pos = PIXEL_VALUE;
     552              : 
     553         7685 :         if (x + 1 < (int) data->width) {
     554         7685 :             pos = &frame[y * data->width + x + 1];
     555         7685 :             *pos = PIXEL_VALUE;
     556              :         }
     557         7685 :         if (y + 1 < (int) data->height) {
     558         7685 :             pos = &frame[(y + 1) * data->width + x];
     559         7685 :             *pos = PIXEL_VALUE;
     560              :         }
     561              :     }
     562         7685 : }
     563              : 
     564              : /**
     565              :  * @brief Draw line with dot at the end of line
     566              :  * @param[out] out_info The output buffer (RGBA plain)
     567              :  * @param[in] bdata The pose-estimation internal data.
     568              :  * @param[in] coordinate of two end point of line
     569              :  */
     570              : static void
     571          680 : draw_line_with_dot (uint32_t * frame, pose_data * data, int x1, int y1, int x2,
     572              :     int y2)
     573              : {
     574              :   int i, dx, sx, dy, sy, err;
     575              :   uint32_t *pos;
     576          680 :   int xx[40] =
     577              :       { -4, 0, 4, 0, -3, -3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1,
     578              :     0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3
     579              :   };
     580          680 :   int yy[40] =
     581              :       { 0, -4, 0, 4, -1, 0, 1, -2, -1, 0, 1, 2, -3, -2, -1, 0, 1, 2, 3, -3, -2,
     582              :     -1, 1, 2, 3, -3, -2, -1, 0, 1, 2, 3, -2, -1, 0, 1, 2, -1, 0, 1
     583              :   };
     584              : 
     585              :   int xs, ys, xe, ye;
     586              : 
     587          680 :   if (x1 > x2) {
     588           98 :     xs = x2;
     589           98 :     ys = y2;
     590           98 :     xe = x1;
     591           98 :     ye = y1;
     592              :   } else {
     593          582 :     xs = x1;
     594          582 :     ys = y1;
     595          582 :     xe = x2;
     596          582 :     ye = y2;
     597              :   }
     598              : 
     599              : 
     600        27880 :   for (i = 0; i < 40; i++) {
     601        27200 :     if ((ys + yy[i] >= 0) && (ys + yy[i] < (int) data->height) &&
     602        16320 :         (xs + xx[i] >= 0) && (xs + xx[i] < (int) data->width)) {
     603         9920 :       pos = &frame[(ys + yy[i]) * data->width + xs + xx[i]];
     604         9920 :       *pos = PIXEL_VALUE;
     605              :     }
     606        27200 :     if ((ye + yy[i] >= 0) && (ye + yy[i] < (int) data->height) &&
     607        16320 :         (xe + xx[i] >= 0) && (xe + xx[i] < (int) data->width)) {
     608        11920 :       pos = &frame[(ye + yy[i]) * data->width + xe + xx[i]];
     609        11920 :       *pos = PIXEL_VALUE;
     610              :     }
     611              :   }
     612              : 
     613              : 
     614          680 :   dx = abs (xe - xs);
     615          680 :   sx = xs < xe ? 1 : -1;
     616          680 :   dy = abs (ye - ys);
     617          680 :   sy = ys < ye ? 1 : -1;
     618          680 :   err = (dx > dy ? dx : -dy) / 2;
     619              : 
     620         7685 :   while (setpixel (frame, data, xs, ys), xs != xe || ys != ye) {
     621         7005 :     int e2 = err;
     622         7005 :     if (e2 > -dx) {
     623         7005 :       err -= dy;
     624         7005 :       xs += sx;
     625              :     }
     626         7005 :     if (e2 < dy) {
     627            0 :       err += dx;
     628            0 :       ys += sy;
     629              :     }
     630              :   }
     631          680 : }
     632              : 
     633              : /**
     634              :  * @brief Draw label with the given results (pose) to the output buffer
     635              :  * @param[out] out_info The output buffer (RGBA plain)
     636              :  * @param[in] bdata The pose-estimation internal data.
     637              :  * @param[in] results The final results to be drawn.
     638              :  */
     639              : static void
     640           44 : draw_label (uint32_t * frame, pose_data * data, pose * xydata)
     641              : {
     642              :   int x1, y1, x2, y2;
     643              :   uint32_t *pos1, *pos2;
     644              : 
     645              :   guint i;
     646           44 :   guint pose_size = data->total_labels;
     647              :   char *label;
     648          720 :   for (i = 0; i < pose_size; i++) {
     649          676 :     if (xydata[i].valid) {
     650          666 :       pose_metadata_t *md = pose_get_metadata_by_id (data, i);
     651              :       gsize j, label_len;
     652              : 
     653          666 :       x1 = xydata[i].x;
     654          666 :       y1 = xydata[i].y;
     655          666 :       if (md == NULL)
     656            0 :         continue;
     657          666 :       label = md->label;
     658          666 :       label_len = label ? strlen (label) : 0;
     659          666 :       y1 = MAX (0, (y1 - 14));
     660          666 :       pos1 = &frame[y1 * data->width + x1];
     661         5729 :       for (j = 0; j < label_len; j++) {
     662         5070 :         unsigned int char_index = label[j];
     663         5070 :         if ((x1 + 8) > (int) data->width)
     664            7 :           break;
     665         5063 :         pos2 = pos1;
     666        70882 :         for (y2 = 0; y2 < 13; y2++) {
     667       592371 :           for (x2 = 0; x2 < 8; x2++) {
     668       526552 :             *(pos2 + x2) = singleLineSprite[char_index][y2][x2];
     669              :           }
     670        65819 :           pos2 += data->width;
     671              :         }
     672         5063 :         x1 += 9;
     673         5063 :         pos1 += 9;
     674              :       }
     675              :     }
     676              :   }
     677           44 : }
     678              : 
     679              : /**
     680              :  * @brief Draw with the given results (pose) to the output buffer
     681              :  * @param[out] out_info The output buffer (RGBA plain)
     682              :  * @param[in] bdata The pose-estimation internal data.
     683              :  * @param[in] results The final results to be drawn.
     684              :  */
     685              : static void
     686           44 : draw (GstMapInfo * out_info, pose_data * data, GArray * results)
     687              : {
     688              :   guint i;
     689              :   gint j;
     690           44 :   uint32_t *frame = (uint32_t *) out_info->data;        /* Let's draw per pixel (4bytes) */
     691           44 :   guint pose_size = data->total_labels;
     692              : 
     693           44 :   pose **XYdata = g_new0 (pose *, pose_size);
     694           44 :   if (!XYdata) {
     695            0 :     ml_loge ("The memory allocation is failed.");
     696            0 :     return;
     697              :   }
     698              : 
     699          720 :   for (i = 0; i < pose_size; i++) {
     700          676 :     XYdata[i] = &g_array_index (results, pose, i);
     701          676 :     if (XYdata[i]->prob < 0.5) {
     702           10 :       XYdata[i]->valid = FALSE;
     703              :     }
     704              :   }
     705              : 
     706          720 :   for (i = 0; i < pose_size; i++) {
     707              :     pose_metadata_t *smd;
     708          676 :     if (XYdata[i]->valid == FALSE)
     709           10 :       continue;
     710          666 :     smd = pose_get_metadata_by_id (data, i);
     711          666 :     if (smd == NULL)
     712            0 :       continue;
     713         2030 :     for (j = 0; j < smd->num_connections; j++) {
     714         1364 :       guint k = smd->connections[j];
     715              :       /* Have we already drawn the connection ? */
     716         1364 :       if ((k > data->total_labels) || (k < i))
     717          683 :         continue;
     718              :       /* Is the body point valid ? */
     719          681 :       if (XYdata[k]->valid == FALSE)
     720            1 :         continue;
     721          680 :       draw_line_with_dot (frame, data,
     722          680 :           XYdata[i]->x, XYdata[i]->y, XYdata[k]->x, XYdata[k]->y);
     723              :     }
     724              :   }
     725              : 
     726           44 :   draw_label (frame, data, *XYdata);
     727              : 
     728           44 :   g_free (XYdata);
     729              : }
     730              : 
     731              : /** @brief tensordec-plugin's TensorDecDef callback */
     732              : static GstFlowReturn
     733           44 : pose_decode (void **pdata, const GstTensorsConfig * config,
     734              :     const GstTensorMemory * input, GstBuffer * outbuf)
     735              : {
     736           44 :   pose_data *data = *pdata;
     737           44 :   const size_t size = (size_t) data->width * data->height * 4;   /* RGBA */
     738              :   GstMapInfo out_info;
     739              :   GstMemory *out_mem;
     740           44 :   GArray *results = NULL;
     741           44 :   const GstTensorMemory *detections = NULL;
     742              :   float *arr;
     743              :   int i, j;
     744              :   int grid_xsize, grid_ysize;
     745              :   guint pose_size, index;
     746              : 
     747           44 :   g_assert (outbuf); /** GST Internal Bug */
     748              :   /* Ensure we have outbuf properly allocated */
     749           44 :   if (gst_buffer_get_size (outbuf) == 0) {
     750           44 :     out_mem = gst_allocator_alloc (NULL, size, NULL);
     751              :   } else {
     752            0 :     if (gst_buffer_get_size (outbuf) < size) {
     753            0 :       gst_buffer_set_size (outbuf, size);
     754              :     }
     755            0 :     out_mem = gst_buffer_get_all_memory (outbuf);
     756              :   }
     757           44 :   if (!gst_memory_map (out_mem, &out_info, GST_MAP_WRITE)) {
     758            0 :     gst_memory_unref (out_mem);
     759            0 :     ml_loge ("Cannot map output memory / tensordec-pose.\n");
     760           44 :     return GST_FLOW_ERROR;
     761              :   }
     762              :   /** reset the buffer with alpha 0 / black */
     763           44 :   memset (, 0, size);
     764              : 
     765           44 :   pose_size = data->total_labels;
     766              : 
     767           44 :   grid_xsize = config->[0].dimension[1];
     768           44 :   grid_ysize = config->[0].dimension[2];
     769              : 
     770           44 :   results = g_array_sized_new (FALSE, TRUE, sizeof (pose), pose_size);
     771           44 :   detections = &input[0];
     772           44 :   arr = detections->data;
     773          720 :   for (index = 0; index < pose_size; index++) {
     774          676 :     int maxX = 0;
     775          676 :     int maxY = 0;
     776          676 :     float max = G_MINFLOAT;
     777              :     pose p;
     778         1352 :     for (j = 0; j < grid_ysize; j++) {
     779        11160 :       for (i = 0; i < grid_xsize; i++) {
     780        10484 :         float cen = arr[i * pose_size + j * grid_xsize * pose_size + index];
     781        10484 :         if (data->mode == HEATMAP_OFFSET) {
     782            0 :           cen = _sigmoid (cen);
     783              :         }
     784        10484 :         if (cen > max) {
     785          889 :           max = cen;
     786          889 :           maxX = i;
     787          889 :           maxY = j;
     788              :         }
     789              :       }
     790              :     }
     791              : 
     792          676 :     p.valid = TRUE;
     793          676 :     p.prob = max;
     794          676 :     if (data->mode == HEATMAP_OFFSET) {
     795            0 :       const gfloat *offset = ((const GstTensorMemory *) &input[1])->data;
     796              :       gfloat offsetX, offsetY, posX, posY;
     797              :       int offsetIdx;
     798            0 :       offsetIdx = (maxY * grid_xsize + maxX) * pose_size * 2 + index;
     799            0 :       offsetY = offset[offsetIdx];
     800            0 :       offsetX = offset[offsetIdx + pose_size];
     801            0 :       posX = (((gfloat) maxX) / (grid_xsize - 1)) * data->i_width + offsetX;
     802            0 :       posY = (((gfloat) maxY) / (grid_ysize - 1)) * data->i_height + offsetY;
     803            0 :       p.x = posX * data->width / data->i_width;
     804            0 :       p.y = posY * data->height / data->i_height;
     805              : 
     806              :     } else {
     807          676 :       p.x = (maxX * data->width) / data->i_width;
     808          676 :       p.y = (maxY * data->height) / data->i_height;
     809              :     }
     810              :     /* Some keypoints can be estimated slightly out of image range */
     811          676 :     p.x = MIN (data->width, (guint) (MAX (0, p.x)));
     812          676 :     p.y = MIN (data->height, (guint) (MAX (0, p.y)));
     813              : 
     814          676 :     g_array_append_val (results, p);
     815              :   }
     816              : 
     817           44 :   draw (&out_info, data, results);
     818           44 :   g_array_free (results, TRUE);
     819           44 :   gst_memory_unmap (out_mem, &out_info);
     820           44 :   if (gst_buffer_get_size (outbuf) == 0)
     821           44 :     gst_buffer_append_memory (outbuf, out_mem);
     822              :   else
     823            0 :     gst_buffer_replace_all_memory (outbuf, out_mem);
     824              : 
     825           44 :   return GST_FLOW_OK;
     826              : }
     827              : 
     828              : static gchar decoder_subplugin_pose_estimation[] = "pose_estimation";
     829              : /** @brief Pose Estimation tensordec-plugin TensorDecDef instance */
     830              : static GstTensorDecoderDef poseEstimation = {
     831              :   .modename = decoder_subplugin_pose_estimation,
     832              :   .init = pose_init,
     833              :   .exit = pose_exit,
     834              :   .setOption = pose_setOption,
     835              :   .getOutCaps = pose_getOutCaps,
     836              :   .getTransformSize = pose_getTransformSize,
     837              :   .decode = pose_decode
     838              : };
     839              : 
     840              : /** @brief Initialize this object for tensordec-plugin */
     841              : void
     842           35 : init_pose (void)
     843              : {
     844           35 :   nnstreamer_decoder_probe (&poseEstimation);
     845           35 : }
     846              : 
     847              : /** @brief Destruct this object for tensordec-plugin */
     848              : void
     849           35 : fini_pose (void)
     850              : {
     851           35 :   nnstreamer_decoder_exit (poseEstimation.modename);
     852           35 : }

Generated by: LCOV version 2.0-1