Line data Source code
1 : /**
2 : * GStreamer / NNStreamer tensor_decoder subplugin, "bounding boxes"
3 : * Copyright (C) 2018 Samsung Electronics Co. Ltd.
4 : * Copyright (C) 2018 MyungJoo Ham <myungjoo.ham@samsung.com>
5 : * Copyright 2021 NXP
6 : *
7 : * This library is free software; you can redistribute it and/or
8 : * modify it under the terms of the GNU Library General Public
9 : * License as published by the Free Software Foundation;
10 : * version 2.1 of the License.
11 : *
12 : * This library is distributed in the hope that it will be useful,
13 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : * Library General Public License for more details.
16 : *
17 : */
18 : /**
19 : * @file tensordec-boundingbox.h
20 : * @date 15 Nov 2018
21 : * @brief NNStreamer tensor-decoder subplugin, "bounding boxes",
22 : * which converts tensors to video stream w/ boxes on
23 : * transparent background.
24 : * This code is NYI/WIP and not compilable.
25 : *
26 : * @see https://github.com/nnstreamer/nnstreamer
27 : * @author MyungJoo Ham <myungjoo.ham@samsung.com>
28 : * @bug No known bugs except for NYI items
29 : *
30 : * option1: Decoder mode of bounding box.
31 : * Available: yolov5
32 : * mobilenet-ssd (single shot multibox detector with priors.)
33 : * mobilenet-ssd-postprocess
34 : * ov-person-detection
35 : * tf-ssd (deprecated, recommend to use mobilenet-ssd-postprocess)
36 : * tflite-ssd (deprecated, recommend to use mobilenet-ssd)
37 : * option2: Location of label file
38 : * This is independent from option1
39 : * option3: Any option1-dependent values
40 : * !!This depends on option1 values!!
41 : * for yolov5 and yolov8 mode:
42 : * The option3 requires up to 3 numbers, which tell
43 : * - whether the output values are scaled or not
44 : * 0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)
45 : * - the threshold of confidence (optional, default set to 0.25)
46 : * - the threshold of IOU (optional, default set to 0.45)
47 : * An example of option3 is "option3=0:0.65:0.6"
48 : * for mobilenet-ssd mode:
49 : * The option3 definition scheme is, in order, the following:
50 : * - box priors location file (mandatory)
51 : * - Detection threshold (optional, default set to 0.5)
52 : * - Y box scale (optional, default set to 10.0)
53 : * - X box scale (optional, default set to 10.0)
54 : * - h box scale (optional, default set to 5.0)
55 : * - w box scale (optional, default set to 5.0)
56 : * - IOU box valid threshold (optional, default set to 0.5)
57 : * The default parameters value could be set in the following ways:
58 : * option3=box-priors.txt:0.5:10.0:10.0:5.0:5.0:0.5
59 : * option3=box-priors.txt
60 : * option3=box-priors.txt::::::
61 : *
62 : * It's possible to set only few values, using the default values for
63 : * those not specified through the command line.
64 : * You could specify respectively the detection and IOU thresholds to 0.65
65 : * and 0.6 with the option3 parameter as follow:
66 : * option3=box-priors.txt:0.65:::::0.6
67 : * for mobilenet-ssd-postprocess mode:
68 : * The option3 is required to have 5 integer numbers, which tell
69 : * the tensor-dec how to interpret the given tensor inputs.
70 : * The first 4 numbers separated by colon, ':', designate which
71 : * are location:class:score:number of the tensors.
72 : * The last number separated by comma, ',' from the first 4 numbers
73 : * designate the threshold in percent.
74 : * In other words, "option3=%i:%i:%i:%i,%i".
75 : * for mp-palm-detection mode:
76 : * The option3 is required to have 5 float numbers, as following
77 : * - box score threshold (mandatory)
78 : * - number of layers for anchor generation (optional, default set to 4)
79 : * - minimum scale factor for anchor generation (optional, default set to 1.0)
80 : * - maximum scale factor for anchor generation (optional, default set to 1.0)
81 : * - X offset (optional, default set to 0.5)
82 : * - Y offset (optional, default set to 0.5)
83 : * - strides for each layer for anchor generation (optional, default set to 8:16:16:16)
84 : * The default parameter value could be set in the following ways:
85 : * option3=0.5
86 : * option3=0.5:4:0.2:0.8
87 : * option3=0.5:4:1.0:1.0:0.5:0.5:8:16:16:16
88 : *
89 : * option4: Video Output Dimension (WIDTH:HEIGHT)
90 : * This is independent from option1
91 : * option5: Input Dimension (WIDTH:HEIGHT)
92 : * This is independent from option1
93 : * option6: Whether to track result bounding boxes or not
94 : * 0 (default, do not track)
95 : * 1 (track result bounding boxes, with naive centroid based algorithm)
96 : * option7: Whether to log the result bounding boxes or not
97 : * 0 (default, do not log)
98 : * 1 (log result bounding boxes)
99 : * option8: Box Style (NYI)
100 : *
101 : * MAJOR TODO: Support other colorspaces natively from _decode for performance gain
102 : * (e.g., BGRA, ARGB, ...)
103 : *
104 : */
105 :
106 : #ifndef _TENSORDECBB_H__
107 : #define _TENSORDECBB_H__
108 : #include <gst/gst.h>
109 : #include <math.h> /* expf */
110 : #include <nnstreamer_log.h>
111 : #include <nnstreamer_plugin_api_util.h>
112 : #include <nnstreamer_util.h>
113 : #include "tensordecutil.h"
114 :
115 : #define MAX_POLY_CORNERS 8
116 : #define PIXEL_VALUE (0xFF0000FF) /* RED 100% in RGBA */
117 :
118 : /**
119 : * @brief Option of bounding box
120 : */
121 : enum class BoundingBoxOption {
122 : MODE = 0,
123 : LABEL_PATH = 1,
124 : INTERNAL = 2,
125 : VIDEO_SIZE = 3,
126 : INPUT_MODEL_SIZE = 4,
127 : TRACK = 5,
128 : LOG = 6,
129 : UNKNOWN,
130 : };
131 :
132 : /**
133 : * @brief There can be different schemes for bounding boxes.
134 : */
135 : typedef enum {
136 : MOBILENET_SSD_BOUNDING_BOX = 0,
137 : MOBILENET_SSD_PP_BOUNDING_BOX = 1,
138 : OV_PERSON_DETECTION_BOUNDING_BOX = 2,
139 : OV_FACE_DETECTION_BOUNDING_BOX = 3,
140 :
141 : /* the modes started with 'OLDNAME_' is for backward compatibility. */
142 : OLDNAME_MOBILENET_SSD_BOUNDING_BOX = 4,
143 : OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX = 5,
144 :
145 : YOLOV5_BOUNDING_BOX = 6,
146 :
147 : MP_PALM_DETECTION_BOUNDING_BOX = 7,
148 :
149 : YOLOV8_BOUNDING_BOX = 8,
150 :
151 : YOLOV8_ORIENTED_BOUNDING_BOX = 9,
152 :
153 : BOUNDING_BOX_UNKNOWN,
154 : } bounding_box_modes;
155 :
156 : /**
157 : * @brief Structure for object centroid tracking.
158 : */
159 : typedef struct {
160 : guint id;
161 : guint matched_box_idx;
162 : gint cx;
163 : gint cy;
164 : guint consecutive_disappeared_frames;
165 : } centroid;
166 :
167 : /**
168 : * @brief Structure for distances. {distance} : {centroids} x {boxes}
169 : */
170 : typedef struct {
171 : guint centroid_idx;
172 : guint box_idx;
173 : guint64 distance;
174 : } distanceArrayData;
175 :
176 : /**
177 : * @brief anchor data
178 : */
179 : typedef struct {
180 : float x_center;
181 : float y_center;
182 : float w;
183 : float h;
184 : } anchor;
185 :
186 : /**
187 : * @brief obb anchor data
188 : */
189 : typedef struct {
190 : float x_center;
191 : float y_center;
192 : float w;
193 : float h;
194 : float angle;
195 : } obb_anchor;
196 :
197 : /**
198 : * @brief Point data structure
199 : */
200 : typedef struct {
201 : float x;
202 : float y;
203 : } Point;
204 :
205 : /** @brief Represents a detect object */
206 : typedef struct {
207 : int valid;
208 : int class_id;
209 : int x;
210 : int y;
211 : int width;
212 : int height;
213 : float angle;
214 : float prob;
215 :
216 : int tracking_id;
217 : } detectedObject;
218 :
219 : /**
220 : * @brief check the num_tensors is valid
221 : * @param[in] config The structure of tensors info to check.
222 : * @param[in] limit The limit of tensors number.
223 : * @return TRUE if tensors info is valid.
224 : */
225 : int check_tensors (const GstTensorsConfig *config, const unsigned int limit);
226 :
227 : /**
228 : * @brief Apply NMS to the given results (objects[DETECTION_MAX])
229 : * @param[in/out] results The results to be filtered with nms
230 : */
231 : void nms (GArray *results, gfloat threshold, bounding_box_modes mode = BOUNDING_BOX_UNKNOWN);
232 :
233 : /**
234 : * @brief Interface for Bounding box's properties
235 : */
236 : class BoxProperties
237 : {
238 : public:
239 120 : virtual ~BoxProperties () = default;
240 :
241 : /* mandatory methods */
242 : virtual int setOptionInternal (const char *param) = 0;
243 : virtual int checkCompatible (const GstTensorsConfig *config) = 0;
244 : virtual GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input) = 0;
245 :
246 30 : void setInputWidth (guint width)
247 : {
248 30 : i_width = width;
249 30 : }
250 30 : void setInputHeight (guint height)
251 : {
252 30 : i_height = height;
253 30 : }
254 13 : void setTotalLabels (guint labels)
255 : {
256 13 : total_labels = labels;
257 13 : }
258 :
259 32 : guint getInputWidth ()
260 : {
261 32 : return i_width;
262 : }
263 32 : guint getInputHeight ()
264 : {
265 32 : return i_height;
266 : }
267 : gchar *name;
268 :
269 : protected:
270 : guint i_width; /**< Input Video Width */
271 : guint i_height; /**< Input Video Height */
272 :
273 : guint max_detection;
274 : guint total_labels;
275 : };
276 :
277 : /**
278 : * @brief Class for Bounding box tensor decoder
279 : */
280 : class BoundingBox
281 : {
282 : public:
283 : BoundingBox ();
284 : ~BoundingBox ();
285 :
286 : gboolean checkLabelProps ();
287 : int setBoxDecodingMode (const char *param);
288 : int setLabelPath (const char *param);
289 : int setVideoSize (const char *param);
290 : int setInputModelSize (const char *param);
291 : void draw (GstMapInfo *out_info, GArray *results);
292 : void logBoxes (GArray *results);
293 : void updateCentroids (GArray *boxes);
294 :
295 : int setOption (BoundingBoxOption opNum, const char *param);
296 : GstCaps *getOutCaps (const GstTensorsConfig *config);
297 : GstFlowReturn decode (const GstTensorsConfig *config,
298 : const GstTensorMemory *input, GstBuffer *outbuf);
299 :
300 : static BoxProperties *getProperties (const gchar *properties_name);
301 : static gboolean addProperties (BoxProperties *boxProperties);
302 :
303 : private:
304 : bounding_box_modes mode;
305 : BoxProperties *bdata;
306 :
307 : /* From option2 */
308 : imglabel_t labeldata;
309 : char *label_path;
310 :
311 : /* From option4 */
312 : guint width; /**< Output Video Width */
313 : guint height; /**< Output Video Height */
314 :
315 : /* From option6 (track or not) */
316 : gint is_track;
317 : guint centroids_last_id; /**< The last_id of centroid valid id is 1, 2, ... (not 0). */
318 : guint max_centroids_num; /**< The maximum number of centroids */
319 : guint consecutive_disappear_threshold; /**< The threshold of consecutive disappeared frames */
320 :
321 : GArray *centroids; /**< Array for centroids */
322 : GArray *distanceArray; /**< Array for distances */
323 :
324 : /* From option7 (log or not) */
325 : gint do_log;
326 :
327 : gboolean flag_use_label;
328 :
329 : /* Table for box properties data */
330 : inline static GHashTable *properties_table;
331 : };
332 : #endif /* _TENSORDECBB_H__ */
|