LCOV - code coverage report
Current view: top level - nnstreamer-2.4.2/ext/nnstreamer/tensor_decoder - tensordec-boundingbox.h (source / functions) Coverage Total Hit
Test: nnstreamer 2.4.2-0 nnstreamer/nnstreamer#eca68b8d050408568af95d831a8eef62aaee7784 Lines: 100.0 % 14 14
Test Date: 2025-03-13 05:38:21 Functions: 85.7 % 7 6

            Line data    Source code
       1              : /**
       2              :  * GStreamer / NNStreamer tensor_decoder subplugin, "bounding boxes"
       3              :  * Copyright (C) 2018 Samsung Electronics Co. Ltd.
       4              :  * Copyright (C) 2018 MyungJoo Ham <myungjoo.ham@samsung.com>
       5              :  * Copyright 2021 NXP
       6              :  *
       7              :  * This library is free software; you can redistribute it and/or
       8              :  * modify it under the terms of the GNU Library General Public
       9              :  * License as published by the Free Software Foundation;
      10              :  * version 2.1 of the License.
      11              :  *
      12              :  * This library is distributed in the hope that it will be useful,
      13              :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14              :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15              :  * Library General Public License for more details.
      16              :  *
      17              :  */
      18              : /**
      19              :  * @file        tensordec-boundingbox.h
      20              :  * @date        15 Nov 2018
      21              :  * @brief       NNStreamer tensor-decoder subplugin, "bounding boxes",
      22              :  *              which converts tensors to video stream w/ boxes on
      23              :  *              transparent background.
      24              :  *              This code is NYI/WIP and not compilable.
      25              :  *
      26              :  * @see         https://github.com/nnstreamer/nnstreamer
      27              :  * @author      MyungJoo Ham <myungjoo.ham@samsung.com>
      28              :  * @bug         No known bugs except for NYI items
      29              :  *
      30              :  * option1: Decoder mode of bounding box.
      31              :  *          Available: yolov5
      32              :  *                     mobilenet-ssd (single shot multibox detector with priors.)
      33              :  *                     mobilenet-ssd-postprocess
      34              :  *                     ov-person-detection
      35              :  *                     tf-ssd (deprecated, recommend to use mobilenet-ssd-postprocess)
      36              :  *                     tflite-ssd (deprecated, recommend to use mobilenet-ssd)
      37              :  * option2: Location of label file
      38              :  *          This is independent from option1
      39              :  * option3: Any option1-dependent values
      40              :  *          !!This depends on option1 values!!
      41              :  *          for yolov5 and yolov8 mode:
      42              :  *            The option3 requires up to 3 numbers, which tell
      43              :  *              - whether the output values are scaled or not
      44              :  *                0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)
      45              :  *              - the threshold of confidence (optional, default set to 0.25)
      46              :  *              - the threshold of IOU (optional, default set to 0.45)
      47              :  *            An example of option3 is "option3=0:0.65:0.6"
      48              :  *          for mobilenet-ssd mode:
      49              :  *            The option3 definition scheme is, in order, the following:
      50              :  *                - box priors location file (mandatory)
      51              :  *                - Detection threshold (optional, default set to 0.5)
      52              :  *                - Y box scale (optional, default set to 10.0)
      53              :  *                - X box scale (optional, default set to 10.0)
      54              :  *                - h box scale (optional, default set to 5.0)
      55              :  *                - w box scale (optional, default set to 5.0)
      56              :  *                - IOU box valid threshold (optional, default set to 0.5)
      57              :  *            The default parameters value could be set in the following ways:
      58              :  *            option3=box-priors.txt:0.5:10.0:10.0:5.0:5.0:0.5
      59              :  *            option3=box-priors.txt
      60              :  *            option3=box-priors.txt::::::
      61              :  *
      62              :  *            It's possible to set only few values, using the default values for
      63              :  *            those not specified through the command line.
      64              :  *            You could specify respectively the detection and IOU thresholds to 0.65
      65              :  *            and 0.6 with the option3 parameter as follow:
      66              :  *            option3=box-priors.txt:0.65:::::0.6
      67              :  *          for mobilenet-ssd-postprocess mode:
      68              :  *            The option3 is required to have 5 integer numbers, which tell
      69              :  *            the tensor-dec how to interpret the given tensor inputs.
      70              :  *            The first 4 numbers separated by colon, ':', designate which
      71              :  *            are location:class:score:number of the tensors.
      72              :  *            The last number separated by comma, ',' from the first 4 numbers
      73              :  *            designate the threshold in percent.
      74              :  *            In other words, "option3=%i:%i:%i:%i,%i".
      75              :  *          for mp-palm-detection mode:
      76              :  *            The option3 is required to have 5 float numbers, as following
      77              :  *                - box score threshold (mandatory)
      78              :  *                - number of layers for anchor generation (optional, default set to 4)
      79              :  *                - minimum scale factor for anchor generation (optional, default set to 1.0)
      80              :  *                - maximum scale factor for anchor generation (optional, default set to 1.0)
      81              :  *                - X offset (optional, default set to 0.5)
      82              :  *                - Y offset (optional, default set to 0.5)
      83              :  *                - strides for each layer for anchor generation (optional, default set to 8:16:16:16)
      84              :  *            The default parameter value could be set in the following ways:
      85              :  *            option3=0.5
      86              :  *            option3=0.5:4:0.2:0.8
      87              :  *            option3=0.5:4:1.0:1.0:0.5:0.5:8:16:16:16
      88              :  *
      89              :  * option4: Video Output Dimension (WIDTH:HEIGHT)
      90              :  *          This is independent from option1
      91              :  * option5: Input Dimension (WIDTH:HEIGHT)
      92              :  *          This is independent from option1
      93              :  * option6: Whether to track result bounding boxes or not
      94              :  *          0 (default, do not track)
      95              :  *          1 (track result bounding boxes, with naive centroid based algorithm)
      96              :  * option7: Whether to log the result bounding boxes or not
      97              :  *          0 (default, do not log)
      98              :  *          1 (log result bounding boxes)
      99              :  * option8: Box Style (NYI)
     100              :  *
     101              :  * MAJOR TODO: Support other colorspaces natively from _decode for performance gain
     102              :  * (e.g., BGRA, ARGB, ...)
     103              :  *
     104              :  */
     105              : 
     106              : #ifndef _TENSORDECBB_H__
     107              : #define _TENSORDECBB_H__
     108              : #include <gst/gst.h>
     109              : #include <math.h> /* expf */
     110              : #include <nnstreamer_log.h>
     111              : #include <nnstreamer_plugin_api_util.h>
     112              : #include <nnstreamer_util.h>
     113              : #include "tensordecutil.h"
     114              : 
     115              : #define MAX_POLY_CORNERS 8
     116              : #define PIXEL_VALUE (0xFF0000FF) /* RED 100% in RGBA */
     117              : 
     118              : /**
     119              :  * @brief Option of bounding box
     120              :  */
     121              : enum class BoundingBoxOption {
     122              :   MODE = 0,
     123              :   LABEL_PATH = 1,
     124              :   INTERNAL = 2,
     125              :   VIDEO_SIZE = 3,
     126              :   INPUT_MODEL_SIZE = 4,
     127              :   TRACK = 5,
     128              :   LOG = 6,
     129              :   UNKNOWN,
     130              : };
     131              : 
     132              : /**
     133              :  * @brief There can be different schemes for bounding boxes.
     134              :  */
     135              : typedef enum {
     136              :   MOBILENET_SSD_BOUNDING_BOX = 0,
     137              :   MOBILENET_SSD_PP_BOUNDING_BOX = 1,
     138              :   OV_PERSON_DETECTION_BOUNDING_BOX = 2,
     139              :   OV_FACE_DETECTION_BOUNDING_BOX = 3,
     140              : 
     141              :   /* the modes started with 'OLDNAME_' is for backward compatibility. */
     142              :   OLDNAME_MOBILENET_SSD_BOUNDING_BOX = 4,
     143              :   OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX = 5,
     144              : 
     145              :   YOLOV5_BOUNDING_BOX = 6,
     146              : 
     147              :   MP_PALM_DETECTION_BOUNDING_BOX = 7,
     148              : 
     149              :   YOLOV8_BOUNDING_BOX = 8,
     150              : 
     151              :   YOLOV8_ORIENTED_BOUNDING_BOX = 9,
     152              : 
     153              :   BOUNDING_BOX_UNKNOWN,
     154              : } bounding_box_modes;
     155              : 
     156              : /**
     157              :  * @brief Structure for object centroid tracking.
     158              :  */
     159              : typedef struct {
     160              :   guint id;
     161              :   guint matched_box_idx;
     162              :   gint cx;
     163              :   gint cy;
     164              :   guint consecutive_disappeared_frames;
     165              : } centroid;
     166              : 
     167              : /**
     168              :  * @brief Structure for distances. {distance} : {centroids} x {boxes}
     169              :  */
     170              : typedef struct {
     171              :   guint centroid_idx;
     172              :   guint box_idx;
     173              :   guint64 distance;
     174              : } distanceArrayData;
     175              : 
     176              : /**
     177              :  * @brief anchor data
     178              :  */
     179              : typedef struct {
     180              :   float x_center;
     181              :   float y_center;
     182              :   float w;
     183              :   float h;
     184              : } anchor;
     185              : 
     186              : /**
     187              :  * @brief obb anchor data
     188              :  */
     189              : typedef struct {
     190              :   float x_center;
     191              :   float y_center;
     192              :   float w;
     193              :   float h;
     194              :   float angle;
     195              : } obb_anchor;
     196              : 
     197              : /**
     198              :  * @brief Point data structure
     199              :  */
     200              : typedef struct {
     201              :   float x;
     202              :   float y;
     203              : } Point;
     204              : 
     205              : /** @brief Represents a detect object */
     206              : typedef struct {
     207              :   int valid;
     208              :   int class_id;
     209              :   int x;
     210              :   int y;
     211              :   int width;
     212              :   int height;
     213              :   float angle;
     214              :   float prob;
     215              : 
     216              :   int tracking_id;
     217              : } detectedObject;
     218              : 
     219              : /**
     220              :  * @brief check the num_tensors is valid
     221              :  * @param[in] config The structure of tensors info to check.
     222              :  * @param[in] limit The limit of tensors number.
     223              :  * @return TRUE if tensors info is valid.
     224              :  */
     225              : int check_tensors (const GstTensorsConfig *config, const unsigned int limit);
     226              : 
     227              : /**
     228              :  * @brief Apply NMS to the given results (objects[DETECTION_MAX])
     229              :  * @param[in/out] results The results to be filtered with nms
     230              :  */
     231              : void nms (GArray *results, gfloat threshold, bounding_box_modes mode = BOUNDING_BOX_UNKNOWN);
     232              : 
     233              : /**
     234              :  * @brief       Interface for Bounding box's properties
     235              :  */
     236              : class BoxProperties
     237              : {
     238              :   public:
     239          120 :   virtual ~BoxProperties () = default;
     240              : 
     241              :   /* mandatory methods */
     242              :   virtual int setOptionInternal (const char *param) = 0;
     243              :   virtual int checkCompatible (const GstTensorsConfig *config) = 0;
     244              :   virtual GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input) = 0;
     245              : 
     246           30 :   void setInputWidth (guint width)
     247              :   {
     248           30 :     i_width = width;
     249           30 :   }
     250           30 :   void setInputHeight (guint height)
     251              :   {
     252           30 :     i_height = height;
     253           30 :   }
     254           13 :   void setTotalLabels (guint labels)
     255              :   {
     256           13 :     total_labels = labels;
     257           13 :   }
     258              : 
     259           32 :   guint getInputWidth ()
     260              :   {
     261           32 :     return i_width;
     262              :   }
     263           32 :   guint getInputHeight ()
     264              :   {
     265           32 :     return i_height;
     266              :   }
     267              :   gchar *name;
     268              : 
     269              :   protected:
     270              :   guint i_width; /**< Input Video Width */
     271              :   guint i_height; /**< Input Video Height */
     272              : 
     273              :   guint max_detection;
     274              :   guint total_labels;
     275              : };
     276              : 
     277              : /**
     278              :  * @brief       Class for Bounding box tensor decoder
     279              :  */
     280              : class BoundingBox
     281              : {
     282              :   public:
     283              :   BoundingBox ();
     284              :   ~BoundingBox ();
     285              : 
     286              :   gboolean checkLabelProps ();
     287              :   int setBoxDecodingMode (const char *param);
     288              :   int setLabelPath (const char *param);
     289              :   int setVideoSize (const char *param);
     290              :   int setInputModelSize (const char *param);
     291              :   void draw (GstMapInfo *out_info, GArray *results);
     292              :   void logBoxes (GArray *results);
     293              :   void updateCentroids (GArray *boxes);
     294              : 
     295              :   int setOption (BoundingBoxOption opNum, const char *param);
     296              :   GstCaps *getOutCaps (const GstTensorsConfig *config);
     297              :   GstFlowReturn decode (const GstTensorsConfig *config,
     298              :       const GstTensorMemory *input, GstBuffer *outbuf);
     299              : 
     300              :   static BoxProperties *getProperties (const gchar *properties_name);
     301              :   static gboolean addProperties (BoxProperties *boxProperties);
     302              : 
     303              :   private:
     304              :   bounding_box_modes mode;
     305              :   BoxProperties *bdata;
     306              : 
     307              :   /* From option2 */
     308              :   imglabel_t labeldata;
     309              :   char *label_path;
     310              : 
     311              :   /* From option4 */
     312              :   guint width; /**< Output Video Width */
     313              :   guint height; /**< Output Video Height */
     314              : 
     315              :   /* From option6 (track or not) */
     316              :   gint is_track;
     317              :   guint centroids_last_id; /**< The last_id of centroid valid id is 1, 2, ... (not 0). */
     318              :   guint max_centroids_num; /**< The maximum number of centroids */
     319              :   guint consecutive_disappear_threshold; /**< The threshold of consecutive disappeared frames */
     320              : 
     321              :   GArray *centroids; /**< Array for centroids */
     322              :   GArray *distanceArray; /**< Array for distances */
     323              : 
     324              :   /* From option7 (log or not) */
     325              :   gint do_log;
     326              : 
     327              :   gboolean flag_use_label;
     328              : 
     329              :   /* Table for box properties data */
     330              :   inline static GHashTable *properties_table;
     331              : };
     332              : #endif /* _TENSORDECBB_H__ */
        

Generated by: LCOV version 2.0-1