camera_kl520/utils/ExamplePostProcess.py
2025-05-28 00:15:48 +08:00

345 lines
16 KiB
Python

# ******************************************************************************
# Copyright (c) 2022. Kneron Inc. All rights reserved. *
# ******************************************************************************
from typing import List
from utils.ExampleValue import ExampleBoundingBox, ExampleYoloResult
import os
import sys
import numpy as np
PWD = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(1, os.path.join(PWD, '../..'))
import kp
YOLO_V3_CELL_BOX_NUM = 3
YOLO_V3_BOX_FIX_CH = 5
NMS_THRESH_YOLOV3 = 0.45
NMS_THRESH_YOLOV5 = 0.5
MAX_POSSIBLE_BOXES = 2000
MODEL_SHIRNK_RATIO_TYV3 = [32, 16]
MODEL_SHIRNK_RATIO_V5 = [8, 16, 32]
YOLO_MAX_DETECTION_PER_CLASS = 100
TINY_YOLO_V3_ANCHERS = np.array([
[[81, 82], [135, 169], [344, 319]],
[[23, 27], [37, 58], [81, 82]]
])
YOLO_V5_ANCHERS = np.array([
[[10, 13], [16, 30], [33, 23]],
[[30, 61], [62, 45], [59, 119]],
[[116, 90], [156, 198], [373, 326]]
])
def _sigmoid(x):
return 1. / (1. + np.exp(-x))
def _iou(box_src, boxes_dst):
max_x1 = np.maximum(box_src[0], boxes_dst[:, 0])
max_y1 = np.maximum(box_src[1], boxes_dst[:, 1])
min_x2 = np.minimum(box_src[2], boxes_dst[:, 2])
min_y2 = np.minimum(box_src[3], boxes_dst[:, 3])
area_intersection = np.maximum(0, (min_x2 - max_x1)) * np.maximum(0, (min_y2 - max_y1))
area_src = (box_src[2] - box_src[0]) * (box_src[3] - box_src[1])
area_dst = (boxes_dst[:, 2] - boxes_dst[:, 0]) * (boxes_dst[:, 3] - boxes_dst[:, 1])
area_union = area_src + area_dst - area_intersection
iou = area_intersection / area_union
return iou
def _boxes_scale(boxes, hardware_preproc_info: kp.HwPreProcInfo):
"""
Kneron hardware image pre-processing will do cropping, resize, padding by following ordering:
1. cropping
2. resize
3. padding
"""
ratio_w = hardware_preproc_info.img_width / hardware_preproc_info.resized_img_width
ratio_h = hardware_preproc_info.img_height / hardware_preproc_info.resized_img_height
# rollback padding
boxes[..., :4] = boxes[..., :4] - np.array([hardware_preproc_info.pad_left, hardware_preproc_info.pad_top,
hardware_preproc_info.pad_left, hardware_preproc_info.pad_top])
# scale coordinate
boxes[..., :4] = boxes[..., :4] * np.array([ratio_w, ratio_h, ratio_w, ratio_h])
return boxes
def post_process_tiny_yolo_v3(inference_float_node_output_list: List[kp.InferenceFloatNodeOutput],
hardware_preproc_info: kp.HwPreProcInfo,
thresh_value: float,
with_sigmoid: bool = True) -> ExampleYoloResult:
"""
Tiny YOLO V3 post-processing function.
Parameters
----------
inference_float_node_output_list : List[kp.InferenceFloatNodeOutput]
A floating-point output node list, it should come from
'kp.inference.generic_inference_retrieve_float_node()'.
hardware_preproc_info : kp.HwPreProcInfo
Information of Hardware Pre Process.
thresh_value : float
The threshold of YOLO postprocessing, range from 0.0 ~ 1.0
with_sigmoid: bool, default=True
Do sigmoid operation before postprocessing.
Returns
-------
yolo_result : utils.ExampleValue.ExampleYoloResult
YoloResult object contained the post-processed result.
See Also
--------
kp.core.connect_devices : To connect multiple (including one) Kneron devices.
kp.inference.generic_inference_retrieve_float_node : Retrieve single node output data from raw output buffer.
kp.InferenceFloatNodeOutput
kp.HwPreProcInfo
utils.ExampleValue.ExampleYoloResult
"""
feature_map_list = []
candidate_boxes_list = []
for i in range(len(inference_float_node_output_list)):
anchor_offset = int(inference_float_node_output_list[i].shape[1] / YOLO_V3_CELL_BOX_NUM)
feature_map = inference_float_node_output_list[i].ndarray.transpose((0, 2, 3, 1))
feature_map = _sigmoid(feature_map) if with_sigmoid else feature_map
feature_map = feature_map.reshape((feature_map.shape[0],
feature_map.shape[1],
feature_map.shape[2],
YOLO_V3_CELL_BOX_NUM,
anchor_offset))
ratio_w = hardware_preproc_info.model_input_width / inference_float_node_output_list[i].shape[3]
ratio_h = hardware_preproc_info.model_input_height / inference_float_node_output_list[i].shape[2]
nrows = inference_float_node_output_list[i].shape[2]
ncols = inference_float_node_output_list[i].shape[3]
grids = np.expand_dims(np.stack(np.meshgrid(np.arange(ncols), np.arange(nrows)), 2), axis=0)
for anchor_idx in range(YOLO_V3_CELL_BOX_NUM):
feature_map[..., anchor_idx, 0:2] = (feature_map[..., anchor_idx, 0:2] + grids) * np.array(
[ratio_h, ratio_w])
feature_map[..., anchor_idx, 2:4] = (feature_map[..., anchor_idx, 2:4] * 2) ** 2 * TINY_YOLO_V3_ANCHERS[i][
anchor_idx]
feature_map[..., anchor_idx, 0:2] = feature_map[..., anchor_idx, 0:2] - (
feature_map[..., anchor_idx, 2:4] / 2.)
feature_map[..., anchor_idx, 2:4] = feature_map[..., anchor_idx, 0:2] + feature_map[..., anchor_idx, 2:4]
feature_map = _boxes_scale(boxes=feature_map,
hardware_preproc_info=hardware_preproc_info)
feature_map_list.append(feature_map)
predict_bboxes = np.concatenate(
[np.reshape(feature_map, (-1, feature_map.shape[-1])) for feature_map in feature_map_list], axis=0)
predict_bboxes[..., 5:] = np.repeat(predict_bboxes[..., 4][..., np.newaxis],
predict_bboxes[..., 5:].shape[1],
axis=1) * predict_bboxes[..., 5:]
predict_bboxes_mask = (predict_bboxes[..., 5:] > thresh_value).sum(axis=1)
predict_bboxes = predict_bboxes[predict_bboxes_mask >= 1]
# nms
for class_idx in range(5, predict_bboxes.shape[1]):
candidate_boxes_mask = predict_bboxes[..., class_idx] > thresh_value
class_good_box_count = candidate_boxes_mask.sum()
if class_good_box_count == 1:
candidate_boxes_list.append(
ExampleBoundingBox(
x1=round(float(predict_bboxes[candidate_boxes_mask, 0][0]), 4),
y1=round(float(predict_bboxes[candidate_boxes_mask, 1][0]), 4),
x2=round(float(predict_bboxes[candidate_boxes_mask, 2][0]), 4),
y2=round(float(predict_bboxes[candidate_boxes_mask, 3][0]), 4),
score=round(float(predict_bboxes[candidate_boxes_mask, class_idx][0]), 4),
class_num=class_idx - 5
)
)
elif class_good_box_count > 1:
candidate_boxes = predict_bboxes[candidate_boxes_mask].copy()
candidate_boxes = candidate_boxes[candidate_boxes[:, class_idx].argsort()][::-1]
for candidate_box_idx in range(candidate_boxes.shape[0] - 1):
# origin python version post-processing
if 0 != candidate_boxes[candidate_box_idx][class_idx]:
remove_mask = _iou(box_src=candidate_boxes[candidate_box_idx],
boxes_dst=candidate_boxes[candidate_box_idx + 1:]) > NMS_THRESH_YOLOV3
candidate_boxes[candidate_box_idx + 1:][remove_mask, class_idx] = 0
good_count = 0
for candidate_box_idx in range(candidate_boxes.shape[0]):
if candidate_boxes[candidate_box_idx, class_idx] > 0:
candidate_boxes_list.append(
ExampleBoundingBox(
x1=round(float(candidate_boxes[candidate_box_idx, 0]), 4),
y1=round(float(candidate_boxes[candidate_box_idx, 1]), 4),
x2=round(float(candidate_boxes[candidate_box_idx, 2]), 4),
y2=round(float(candidate_boxes[candidate_box_idx, 3]), 4),
score=round(float(candidate_boxes[candidate_box_idx, class_idx]), 4),
class_num=class_idx - 5
)
)
good_count += 1
if YOLO_MAX_DETECTION_PER_CLASS == good_count:
break
for idx, candidate_boxes in enumerate(candidate_boxes_list):
candidate_boxes_list[idx].x1 = 0 if (candidate_boxes_list[idx].x1 + 0.5 < 0) else int(
candidate_boxes_list[idx].x1 + 0.5)
candidate_boxes_list[idx].y1 = 0 if (candidate_boxes_list[idx].y1 + 0.5 < 0) else int(
candidate_boxes_list[idx].y1 + 0.5)
candidate_boxes_list[idx].x2 = int(hardware_preproc_info.img_width - 1) if (
candidate_boxes_list[idx].x2 + 0.5 > hardware_preproc_info.img_width - 1) else int(candidate_boxes_list[idx].x2 + 0.5)
candidate_boxes_list[idx].y2 = int(hardware_preproc_info.img_height - 1) if (
candidate_boxes_list[idx].y2 + 0.5 > hardware_preproc_info.img_height - 1) else int(candidate_boxes_list[idx].y2 + 0.5)
return ExampleYoloResult(
class_count=predict_bboxes.shape[1] - 5,
box_count=len(candidate_boxes_list),
box_list=candidate_boxes_list
)
def post_process_yolo_v5(inference_float_node_output_list: List[kp.InferenceFloatNodeOutput],
hardware_preproc_info: kp.HwPreProcInfo,
thresh_value: float,
with_sigmoid: bool = True) -> ExampleYoloResult:
"""
YOLO V5 post-processing function.
Parameters
----------
inference_float_node_output_list : List[kp.InferenceFloatNodeOutput]
A floating-point output node list, it should come from
'kp.inference.generic_inference_retrieve_float_node()'.
hardware_preproc_info : kp.HwPreProcInfo
Information of Hardware Pre Process.
thresh_value : float
The threshold of YOLO postprocessing, range from 0.0 ~ 1.0
with_sigmoid: bool, default=True
Do sigmoid operation before postprocessing.
Returns
-------
yolo_result : utils.ExampleValue.ExampleYoloResult
YoloResult object contained the post-processed result.
See Also
--------
kp.core.connect_devices : To connect multiple (including one) Kneron devices.
kp.inference.generic_inference_retrieve_float_node : Retrieve single node output data from raw output buffer.
kp.InferenceFloatNodeOutput
kp.HwPreProcInfo
utils.ExampleValue.ExampleYoloResult
"""
feature_map_list = []
candidate_boxes_list = []
for i in range(len(inference_float_node_output_list)):
anchor_offset = int(inference_float_node_output_list[i].shape[1] / YOLO_V3_CELL_BOX_NUM)
feature_map = inference_float_node_output_list[i].ndarray.transpose((0, 2, 3, 1))
feature_map = _sigmoid(feature_map) if with_sigmoid else feature_map
feature_map = feature_map.reshape((feature_map.shape[0],
feature_map.shape[1],
feature_map.shape[2],
YOLO_V3_CELL_BOX_NUM,
anchor_offset))
ratio_w = hardware_preproc_info.model_input_width / inference_float_node_output_list[i].shape[3]
ratio_h = hardware_preproc_info.model_input_height / inference_float_node_output_list[i].shape[2]
nrows = inference_float_node_output_list[i].shape[2]
ncols = inference_float_node_output_list[i].shape[3]
grids = np.expand_dims(np.stack(np.meshgrid(np.arange(ncols), np.arange(nrows)), 2), axis=0)
for anchor_idx in range(YOLO_V3_CELL_BOX_NUM):
feature_map[..., anchor_idx, 0:2] = (feature_map[..., anchor_idx, 0:2] * 2. - 0.5 + grids) * np.array(
[ratio_h, ratio_w])
feature_map[..., anchor_idx, 2:4] = (feature_map[..., anchor_idx, 2:4] * 2) ** 2 * YOLO_V5_ANCHERS[i][
anchor_idx]
feature_map[..., anchor_idx, 0:2] = feature_map[..., anchor_idx, 0:2] - (
feature_map[..., anchor_idx, 2:4] / 2.)
feature_map[..., anchor_idx, 2:4] = feature_map[..., anchor_idx, 0:2] + feature_map[..., anchor_idx, 2:4]
feature_map = _boxes_scale(boxes=feature_map,
hardware_preproc_info=hardware_preproc_info)
feature_map_list.append(feature_map)
predict_bboxes = np.concatenate(
[np.reshape(feature_map, (-1, feature_map.shape[-1])) for feature_map in feature_map_list], axis=0)
predict_bboxes[..., 5:] = np.repeat(predict_bboxes[..., 4][..., np.newaxis],
predict_bboxes[..., 5:].shape[1],
axis=1) * predict_bboxes[..., 5:]
predict_bboxes_mask = (predict_bboxes[..., 5:] > thresh_value).sum(axis=1)
predict_bboxes = predict_bboxes[predict_bboxes_mask >= 1]
# nms
for class_idx in range(5, predict_bboxes.shape[1]):
candidate_boxes_mask = predict_bboxes[..., class_idx] > thresh_value
class_good_box_count = candidate_boxes_mask.sum()
if class_good_box_count == 1:
candidate_boxes_list.append(
ExampleBoundingBox(
x1=round(float(predict_bboxes[candidate_boxes_mask, 0][0]), 4),
y1=round(float(predict_bboxes[candidate_boxes_mask, 1][0]), 4),
x2=round(float(predict_bboxes[candidate_boxes_mask, 2][0]), 4),
y2=round(float(predict_bboxes[candidate_boxes_mask, 3][0]), 4),
score=round(float(predict_bboxes[candidate_boxes_mask, class_idx][0]), 4),
class_num=class_idx - 5
)
)
elif class_good_box_count > 1:
candidate_boxes = predict_bboxes[candidate_boxes_mask].copy()
candidate_boxes = candidate_boxes[candidate_boxes[:, class_idx].argsort()][::-1]
for candidate_box_idx in range(candidate_boxes.shape[0] - 1):
if 0 != candidate_boxes[candidate_box_idx][class_idx]:
remove_mask = _iou(box_src=candidate_boxes[candidate_box_idx],
boxes_dst=candidate_boxes[candidate_box_idx + 1:]) > NMS_THRESH_YOLOV5
candidate_boxes[candidate_box_idx + 1:][remove_mask, class_idx] = 0
good_count = 0
for candidate_box_idx in range(candidate_boxes.shape[0]):
if candidate_boxes[candidate_box_idx, class_idx] > 0:
candidate_boxes_list.append(
ExampleBoundingBox(
x1=round(float(candidate_boxes[candidate_box_idx, 0]), 4),
y1=round(float(candidate_boxes[candidate_box_idx, 1]), 4),
x2=round(float(candidate_boxes[candidate_box_idx, 2]), 4),
y2=round(float(candidate_boxes[candidate_box_idx, 3]), 4),
score=round(float(candidate_boxes[candidate_box_idx, class_idx]), 4),
class_num=class_idx - 5
)
)
good_count += 1
if YOLO_MAX_DETECTION_PER_CLASS == good_count:
break
for idx, candidate_boxes in enumerate(candidate_boxes_list):
candidate_boxes_list[idx].x1 = 0 if (candidate_boxes_list[idx].x1 + 0.5 < 0) else int(
candidate_boxes_list[idx].x1 + 0.5)
candidate_boxes_list[idx].y1 = 0 if (candidate_boxes_list[idx].y1 + 0.5 < 0) else int(
candidate_boxes_list[idx].y1 + 0.5)
candidate_boxes_list[idx].x2 = int(hardware_preproc_info.img_width - 1) if (
candidate_boxes_list[idx].x2 + 0.5 > hardware_preproc_info.img_width - 1) else int(candidate_boxes_list[idx].x2 + 0.5)
candidate_boxes_list[idx].y2 = int(hardware_preproc_info.img_height - 1) if (
candidate_boxes_list[idx].y2 + 0.5 > hardware_preproc_info.img_height - 1) else int(candidate_boxes_list[idx].y2 + 0.5)
return ExampleYoloResult(
class_count=predict_bboxes.shape[1] - 5,
box_count=len(candidate_boxes_list),
box_list=candidate_boxes_list
)