302 lines
12 KiB
Python
302 lines
12 KiB
Python
import cv2
|
||
from PIL import Image
|
||
import time
|
||
import numpy as np
|
||
import torch
|
||
import onnxruntime
|
||
|
||
from rknn.api import RKNN
|
||
|
||
NUM_CLS = 80
|
||
LISTSIZE = NUM_CLS+5
|
||
SPAN = 3
|
||
OBJ_THRESH = 0.2
|
||
NMS_THRESH = 0.5
|
||
CLASSES = ("person", "bicycle", "car","motorbike ","aeroplane ","bus ","train","truck ","boat","traffic light",
|
||
"fire hydrant","stop sign ","parking meter","bench","bird","cat","dog ","horse ","sheep","cow","elephant",
|
||
"bear","zebra ","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite",
|
||
"baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife ",
|
||
"spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza ","donut","cake","chair","sofa",
|
||
"pottedplant","bed","diningtable","toilet ","tvmonitor","laptop ","mouse ","remote ","keyboard ","cell phone","microwave ",
|
||
"oven ","toaster","sink","refrigerator ","book","clock","vase","scissors ","teddy bear ","hair drier", "toothbrush ")
|
||
|
||
masks = [[0,1,2], [3,4,5], [6,7,8]] #yolov5s
|
||
anchors = [[10,13],[16,30],[33,23],[30,61],[62,45],[59,119],[116,90],[156,198],[373,326]]
|
||
|
||
def letterbox_image(image, size):
|
||
iw, ih = image.size
|
||
w, h = size
|
||
scale = min(w / iw, h / ih)
|
||
nw = int(iw * scale)
|
||
nh = int(ih * scale)
|
||
|
||
image = np.array(image)
|
||
image = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_LINEAR)
|
||
image = Image.fromarray(image)
|
||
new_image = Image.new('RGB', size, (128, 128, 128))
|
||
new_image.paste(image, ((w - nw) // 2, (h - nh) // 2))
|
||
return new_image
|
||
|
||
def w_bbox_iou(box1, box2, x1y1x2y2=True):
|
||
"""
|
||
计算IOU
|
||
"""
|
||
if not x1y1x2y2:
|
||
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
|
||
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
|
||
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
|
||
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
|
||
else:
|
||
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
|
||
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
|
||
|
||
inter_rect_x1 = torch.max(b1_x1, b2_x1)
|
||
inter_rect_y1 = torch.max(b1_y1, b2_y1)
|
||
inter_rect_x2 = torch.min(b1_x2, b2_x2)
|
||
inter_rect_y2 = torch.min(b1_y2, b2_y2)
|
||
|
||
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * \
|
||
torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
|
||
|
||
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
|
||
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
|
||
|
||
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
|
||
|
||
return iou
|
||
|
||
|
||
def w_non_max_suppression(prediction, num_classes, conf_thres=0.1, nms_thres=0.4):
|
||
# 求左上角和右下角
|
||
# box_corner = prediction.new(prediction.shape)
|
||
box_corner = torch.FloatTensor(prediction.shape)
|
||
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
|
||
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
|
||
box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
|
||
box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
|
||
prediction[:, :, :4] = box_corner[:, :, :4]
|
||
|
||
output = [None for _ in range(len(prediction))]
|
||
for image_i, image_pred in enumerate(prediction):
|
||
# 利用置信度进行第一轮筛选
|
||
conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
|
||
image_pred = image_pred[conf_mask]
|
||
|
||
if not image_pred.size(0):
|
||
continue
|
||
|
||
# 获得种类及其置信度
|
||
class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
|
||
|
||
# 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)
|
||
detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
|
||
|
||
# 获得种类
|
||
unique_labels = detections[:, -1].cpu().unique()
|
||
|
||
if prediction.is_cuda:
|
||
unique_labels = unique_labels.cuda()
|
||
|
||
for c in unique_labels:
|
||
# 获得某一类初步筛选后全部的预测结果
|
||
detections_class = detections[detections[:, -1] == c]
|
||
# 按照存在物体的置信度排序
|
||
_, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
|
||
detections_class = detections_class[conf_sort_index]
|
||
# 进行非极大抑制
|
||
max_detections = []
|
||
while detections_class.size(0):
|
||
# 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉
|
||
max_detections.append(detections_class[0].unsqueeze(0))
|
||
if len(detections_class) == 1:
|
||
break
|
||
ious = w_bbox_iou(max_detections[-1], detections_class[1:])
|
||
detections_class = detections_class[1:][ious < nms_thres]
|
||
# 堆叠
|
||
max_detections = torch.cat(max_detections).data
|
||
# Add max detections to outputs
|
||
output[image_i] = max_detections if output[image_i] is None else torch.cat(
|
||
(output[image_i], max_detections))
|
||
|
||
return output
|
||
|
||
|
||
def onnx_postprocess(outputs, img_size_w, img_size_h):
|
||
boxs = []
|
||
a = torch.tensor(anchors).float().view(3, -1, 2)
|
||
anchor_grid = a.clone().view(3, 1, -1, 1, 1, 2)
|
||
for index, out in enumerate(outputs):
|
||
out = torch.from_numpy(out)
|
||
batch = out.shape[1]
|
||
feature_h = out.shape[2]
|
||
feature_w = out.shape[3]
|
||
|
||
# Feature map corresponds to the original image zoom factor
|
||
stride_w = int(img_size_w / feature_w)
|
||
stride_h = int(img_size_h / feature_h)
|
||
|
||
grid_x, grid_y = np.meshgrid(np.arange(feature_w), np.arange(feature_h))
|
||
grid_x, grid_y = torch.from_numpy(np.array(grid_x)).float(), torch.from_numpy(np.array(grid_y)).float()
|
||
|
||
# cx, cy, w, h
|
||
pred_boxes = torch.FloatTensor(out[..., :4].shape)
|
||
pred_boxes[..., 0] = (torch.sigmoid(out[..., 0]) * 2.0 - 0.5 + grid_x) * stride_w # cx
|
||
pred_boxes[..., 1] = (torch.sigmoid(out[..., 1]) * 2.0 - 0.5 + grid_y) * stride_h # cy
|
||
pred_boxes[..., 2:4] = (torch.sigmoid(out[..., 2:4]) * 2) ** 2 * anchor_grid[index] # wh
|
||
pred_boxes_np = pred_boxes.numpy()
|
||
|
||
conf = torch.sigmoid(out[..., 4])
|
||
pred_cls = torch.sigmoid(out[..., 5:])
|
||
|
||
output = torch.cat((pred_boxes.view(1, -1, 4),
|
||
conf.view(1, -1, 1),
|
||
pred_cls.view(1, -1, NUM_CLS)),
|
||
-1)
|
||
boxs.append(output)
|
||
|
||
outputx = torch.cat(boxs, 1)
|
||
# NMS
|
||
batch_detections = w_non_max_suppression(outputx, NUM_CLS, conf_thres=OBJ_THRESH, nms_thres=NMS_THRESH)
|
||
|
||
return batch_detections
|
||
|
||
|
||
def clip_coords(boxes, img_shape):
|
||
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
||
boxes[:, 0].clamp_(0, img_shape[1]) # x1
|
||
boxes[:, 1].clamp_(0, img_shape[0]) # y1
|
||
boxes[:, 2].clamp_(0, img_shape[1]) # x2
|
||
boxes[:, 3].clamp_(0, img_shape[0]) # y2
|
||
|
||
|
||
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
||
# Rescale coords (xyxy) from img1_shape to img0_shape
|
||
if ratio_pad is None: # calculate from img0_shape
|
||
gain = min(img1_shape[0]/img0_shape[0], img1_shape[1]/img0_shape[1]) # gain = old / new
|
||
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
||
else:
|
||
gain = ratio_pad[0][0]
|
||
pad = ratio_pad[1]
|
||
|
||
coords[:, [0, 2]] -= pad[0] # x padding
|
||
coords[:, [1, 3]] -= pad[1] # y padding
|
||
coords[:, :4] /= gain
|
||
clip_coords(coords, img0_shape)
|
||
return coords
|
||
|
||
|
||
def display(detections=None, image_src=None, input_size=(640, 640), line_thickness=None, text_bg_alpha=0.0):
|
||
labels = detections[..., -1]
|
||
boxs = detections[..., :4]
|
||
confs = detections[..., 4]
|
||
|
||
h, w, c = image_src.shape
|
||
|
||
boxs[:, :] = scale_coords(input_size, boxs[:, :], (h, w)).round()
|
||
|
||
tl = line_thickness or round(0.002 * (w + h) / 2) + 1
|
||
for i, box in enumerate(boxs):
|
||
x1, y1, x2, y2 = box
|
||
|
||
ratio = (y2-y1)/(x2-x1)
|
||
|
||
x1, y1, x2, y2 = int(x1.numpy()), int(y1.numpy()), int(x2.numpy()), int(y2.numpy())
|
||
np.random.seed(int(labels[i].numpy()) + 2020)
|
||
color = (np.random.randint(0, 255), 0, np.random.randint(0, 255))
|
||
cv2.rectangle(image_src, (x1, y1), (x2, y2), color, max(int((w + h) / 600), 1), cv2.LINE_AA)
|
||
label = '{0:.3f}'.format(confs[i])
|
||
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=1)[0]
|
||
c2 = x1 + t_size[0] + 3, y1 - t_size[1] - 5
|
||
if text_bg_alpha == 0.0:
|
||
cv2.rectangle(image_src, (x1 - 1, y1), c2, color, cv2.FILLED, cv2.LINE_AA)
|
||
else:
|
||
# 透明文本背景
|
||
alphaReserve = text_bg_alpha # 0:不透明 1:透明
|
||
BChannel, GChannel, RChannel = color
|
||
xMin, yMin = int(x1 - 1), int(y1 - t_size[1] - 3)
|
||
xMax, yMax = int(x1 + t_size[0]), int(y1)
|
||
image_src[yMin:yMax, xMin:xMax, 0] = image_src[yMin:yMax, xMin:xMax, 0] * alphaReserve + BChannel * (1 - alphaReserve)
|
||
image_src[yMin:yMax, xMin:xMax, 1] = image_src[yMin:yMax, xMin:xMax, 1] * alphaReserve + GChannel * (1 - alphaReserve)
|
||
image_src[yMin:yMax, xMin:xMax, 2] = image_src[yMin:yMax, xMin:xMax, 2] * alphaReserve + RChannel * (1 - alphaReserve)
|
||
cv2.putText(image_src, label, (x1 + 3, y1 - 4), 0, tl / 3, [255, 255, 255],
|
||
thickness=1, lineType=cv2.LINE_AA)
|
||
|
||
|
||
|
||
if __name__ == '__main__':
|
||
|
||
exp = 'yolov5s'
|
||
Width = 640
|
||
Height = 640
|
||
MODEL_PATH = './yolov5s.onnx'
|
||
im_file = './dog_bike_car_640x640.jpg'
|
||
RKNN_MODEL_PATH = './{}.rknn'.format(exp + '-' + str(Width) + '-' + str(Height))
|
||
DATASET = './dataset.txt'
|
||
|
||
# Create RKNN object
|
||
rknn = RKNN(verbose=True)
|
||
|
||
|
||
rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]])
|
||
# Load model
|
||
print('--> Loading model')
|
||
ret = rknn.load_onnx(MODEL_PATH)
|
||
if ret != 0:
|
||
print('load model failed!')
|
||
exit(ret)
|
||
print('done')
|
||
|
||
# Build model
|
||
print('--> Building model')
|
||
ret = rknn.build(do_quantization=True, dataset=DATASET)
|
||
if ret != 0:
|
||
print('build model failed.')
|
||
exit(ret)
|
||
print('done')
|
||
|
||
# Export rknn model
|
||
print('--> Export RKNN model')
|
||
ret = rknn.export_rknn(RKNN_MODEL_PATH)
|
||
if ret != 0:
|
||
print('Export rknn model failed.')
|
||
exit(ret)
|
||
print('done')
|
||
|
||
# Set inputs
|
||
image_src = Image.open('./dog_bike_car_640x640.jpg')
|
||
img = letterbox_image(image_src, (Width, Height))
|
||
img = np.array(img)
|
||
|
||
# init runtime environment
|
||
print('--> Init runtime environment')
|
||
ret = rknn.init_runtime()
|
||
if ret != 0:
|
||
print('Init runtime environment failed')
|
||
exit(ret)
|
||
print('done')
|
||
|
||
# inference
|
||
print('--> inference')
|
||
start = time.time()
|
||
outputs = rknn.inference(inputs=[img])
|
||
end = time.time()
|
||
print('inference time: ', end - start)
|
||
print('done')
|
||
|
||
np.save('./onnx_yolov5_0.npy', outputs[0])
|
||
np.save('./onnx_yolov5_1.npy', outputs[1])
|
||
np.save('./onnx_yolov5_2.npy', outputs[2])
|
||
|
||
# inference process
|
||
image_src = np.array(image_src)
|
||
detections = onnx_postprocess(outputs, Width, Height)
|
||
if detections[0] is not None:
|
||
display(detections[0], image_src)
|
||
|
||
image_src = cv2.cvtColor(image_src,cv2.COLOR_BGR2RGB)
|
||
|
||
cv2.imwrite("result.jpg", image_src)
|
||
|
||
rknn.release()
|
||
|