如何解析trtexec输出的模型推理结果数据
还是以onnx为例,执行下面的命令解析onnx生成engine并推理输出结果到一个json文件:
./trtexec --onnx=efficientdet-d0-s.onnx --loadInputs='data':o4_clip-1_raw_data.bin --saveEngine=efficientdet-d0-s.engine --exportOutput=trtexec-result.json
这个--exportOutput指定的json文件里保存的是network的输出节点的数据,对于我修改过的efficientdet网络的输出节点有三个:regression、anchors和classification,那么这个json文件的内容类似如下:
[ { "name" : "regression" , "dimensions" : "1x49104x4" , "values" : [ 5.78506, 6.88422, -1.56519, -17.5148, -0.113892, 0.545003, -1.52597, -0.767865, 1.00629, 0.163376, 0.0242282, -1.89316, 0.187985, 0.499627, -0.414611, -2.14051, 0.371698, 0.478594, -0.0126426, 2.01423, 0.997112, 0.517545, 1.88847, -0.707338, 0.157562, 0.0627687, 0.10975, -0.430063, 0.537361, 0.670655, 0.428142, ...]}, { "name" : "anchors" , "dimensions" : "1x49104x4" , "values" : [ -12, -12, 20, 20, -7.2, -18.4, 15.2, 26.4, -18.4, -7.2, 26.4, 15.2, -16.1587, -16.1587, 24.1587, 24.1587, -10.1111, -24.2222, 18.1111, 32.2222, -24.2222, -10.1111, 32.2222, 18.1111, -21.3984, -21.3984, 29.3984, 29.3984, -13.7789, -31.5578, 21.7789, 39.5578, -31.5578, -13.7789, 39.5578, 21.7789, -12, ...]}, { "name" : "classification" , "dimensions" : "1x49104x1" , "values" : [ 0.000323705, 3.062e-07, 0.00457684, 0.000632869, 0.0004986, 0.000207652, 0.000125256, 6.19738e-07, 0.0203817, 8.39792e-06, 8.40121e-09, 9.50497e-05, 9.16859e-06, 2.48826e-05, 1.39859e-06, 3.46441e-06, 2.93581e-08, 0.000207522, 6.74278e-06, 1.8361e-08, 3.44744e-05, 3.35026e-06, 2.89377e-05, 3.85066e-07, 1.02452e-05,...]}]
怎么把它这个结果解析出来并过滤处理成最终的结果并且在图上画出识别结果的bbox来呢?这个和一般的模型的post process是类似的,需要结合网络本身设计对输出数据做后处理得出合理的bbox和对应的class以及score,针对我这个修改过的efficientdet导出的onnx调用trtexec推理得到的json结果数据进行解析的相关代码如下:
#encoding: utf-8import numpy as npimport osimport cv2import jsonclasses = ['baggage']def nms(bboxes, classifictaion, thresh): x1 = bboxes[:, 0] y1 = bboxes[:, 1] x2 = bboxes[:, 2] y2 = bboxes[:, 3] scores = classifictaion[:, 1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return keepdef parse_result(num_classes,json_file,input_img,output_img): #with open(json_file,'r',encoding='utf-8') as f: with open(json_file,'r') as f: data = json.load(f) regression = np.array(data[0]['values']) #49104 x 4 anchors = np.array(data[1]['values']) #49104 x 4 classification = np.array(data[2]['values']) #49104 x num_classes size_r = np.size(regression) size_c = np.size(classification) print("size_r:",size_r,"size_c:",size_c) max_nnan_ir = -1 max_nnan_ic = -1 i=0 while i< size_r : #if np.isnan(regression[i]): #n/a #if 'nan' == regression[i]: #string 'n/a' if 0 == regression[i]: i += 4 else: max_nnan_ir = i break; i = 0 while i <size_c : #if np.isnan(classification[i]): #n/a #if 'nan' == classification[i]: #string 'n/a' if 0 == classification[i]: i += num_classes else: max_nnan_ic = i break; max_nnan_ir //= 4 max_nnan_ic //= num_classes n = max(max_nnan_ir, max_nnan_ic) #print("max_nnan_ir/4:",max_nnan_ir,"max_nnan_ic /1:",max_nnan_ic,"num_classes:",num_classes) #print("max nan idx:",n-1) #filter out invalid regression data i=n*4 reg_list = [] anc_list = [] cls_list = [] while i< size_r : reg_box = [regression[i],regression[i+1],regression[i+2],regression[i+3]] anc_box = [anchors[i],anchors[i+1],anchors[i+2],anchors[i+3]] reg_list.append(reg_box) anc_list.append(anc_box) i += 4 #filter out invalid classification data i=n while i max_score: max_score = classification[i+k] cls_id = k cls_list.append([cls_id,max_score]) i += num_classes regression = np.array(reg_list) anchors = np.array(anc_list) classification = np.array(cls_list) #print("regression num:",len(regression),"anchors num:",len(anchors),"classification num:",len(classification)) y_centers_a = (anchors[..., 0] + anchors[..., 2]) / 2 x_centers_a = (anchors[..., 1] + anchors[..., 3]) / 2 ha = anchors[..., 2] - anchors[..., 0] wa = anchors[..., 3] - anchors[..., 1] w = np.exp(regression[..., 3]) * wa h = np.exp(regression[..., 2]) * ha y_centers = regression[..., 0] * ha + y_centers_a x_centers = regression[..., 1] * wa + x_centers_a ymin = y_centers - h / 2. xmin = x_centers - w / 2. ymax = y_centers + h / 2. xmax = x_centers + w / 2. xmin = np.maximum(xmin,0) ymin = np.maximum(ymin,0) xmax = np.minimum(xmax,1279) ymax = np.minimum(ymax,959) bboxes = np.vstack([xmin, ymin, xmax, ymax]) print(bboxes.shape) bboxes = bboxes.swapaxes(0,1) print(bboxes.shape) #nms iou_threshold=0.1 keep = nms(bboxes,classification,iou_threshold) bboxes = bboxes[keep,:] classification = classification[keep,:] threshold = 0.1 num_boxes = len(bboxes) #print("num_boxes:",num_boxes,"num_cls:",len(classification)) #bboxes = bboxes*1280/512 box_c = 0 img = cv2.imread(input_img) for i in range(num_boxes):obj = classes[int(classification[i][0])]score = classification[i][1]if score > threshold: box_c += 1 x1 = int(bboxes[i][0]) y1 = int(bboxes[i][1]) x2 = int(bboxes[i][2]) y2 = int(bboxes[i][3]) cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 0), 2) cv2.putText(img, '{} {:.3f}'.format(obj, score), (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2) cv2.imwrite(output_img,img) #cv2.imshow("result",img) #cv2.waitKey(100) #print("draw {} boxes".format(box_c)) if __name__ == "__main__": json_file = "/usr/src/tensorrt/bin/trtexec-result.json" input_img = "/usr/src/tensorrt/bin/o4_clip-1.jpg" output_img = "/usr/src/tensorrt/bin/o4_clip-1-TensorRT_result.jpg" num_class = 1 parse_result(num_class,json_file,input_img,output_img)
如果要将我们自己用pytorch调用pt模型文件或者使用onnxruntime调用onnx识别图像的结果数据转换成trtexec的这种json输出格式,以便于对比结果数据的差异的话(应NVIDIA的要求写了下面的代码来仿照trtexec的输出格式来将模型的输出节点的数据保存到json文件里),可以像下面这样写:
"""Simple Inference Script of EfficientDet-Pytorch"""import jsonimport timeimport onnxruntimeimport torchfrom torch.backends import cudnnfrom backbone import EfficientDetBackboneimport cv2import numpy as npfrom efficientdet.utils import BBoxTransform, ClipBoxesfrom utils.utils import preprocess, invert_affine, postprocessdef to_numpy(tensor): return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()class MyEncoder(json.JSONEncoder): def default(self, obj):if isinstance(obj, np.integer): return int(obj)elif isinstance(obj, np.floating): return float(obj)elif isinstance(obj, np.ndarray): return obj.tolist()else: return super(MyEncoder, self).default(obj)def to_list(arr,r,c): data_list = [] for i in range(r):for j in range(c): data_list.append(arr[i][j]) return data_listcompound_coef = 0force_input_size = None # set None to use default size#img_path = 'test/img.png'img_path = 'o4_clip-1.png'threshold = 0.1iou_threshold = 0.1use_cuda = Trueuse_float16 = Falsecudnn.fastest = Truecudnn.benchmark = Trueobj_list = ['baggage']input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]input_size = input_sizes[compound_coef] if force_input_size is None else force_input_sizeori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)'''model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list))#model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))model.load_state_dict(torch.load(f'logs/airport/efficientdet-d0_499_79500.pth'))model.requires_grad_(False)model.eval()if use_cuda: model = model.cuda()if use_float16: model = model.half()'''ort_session = onnxruntime.InferenceSession("convert/efficientdet-d0.onnx")with torch.no_grad(): ''' #features, regression, classification, anchors = model(x) regression, classification, anchors = model(x) ''' nx = to_numpy(x) #np.save("raw_data_numpy.npy",nx) nx.tofile("raw_data.bin") ort_inputs = {ort_session.get_inputs()[0].name: nx} ort_outs = ort_session.run(None, ort_inputs) #shapes: (1, 49104, 4) (1, 49104, 1) (1, 49104, 4) #print("shapes:",ort_outs[0].shape,ort_outs[1].shape,ort_outs[2].shape) r = 49104 results = [] dict_reg = {} dict_reg["name"] = "regression" dict_reg["dimensions"] = "1x49104x4" dict_reg["values"] = to_list(ort_outs[0][0],r,4) results.append(dict_reg) dict_cls = {} dict_cls["name"] = "classification" dict_cls["dimensions"] = "1x49104x1" dict_cls["values"] = to_list(ort_outs[1][0],r,1) results.append(dict_cls) dict_ach = {} dict_ach["name"] = "anchors" dict_ach["dimensions"] = "1x49104x4" dict_ach["values"] = to_list(ort_outs[2][0],r,4) results.append(dict_ach) f = open("onnxruntime_result.json","w") json.dump(results,f,cls=MyEncoder,indent=2) f.close() regression = torch.from_numpy(ort_outs[0]) classification = torch.from_numpy(ort_outs[1]) anchors = torch.from_numpy(ort_outs[2]) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold)def display(preds, imgs, imshow=True, imwrite=False): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: continue for j in range(len(preds[i]['rois'])): (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int) cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) if score >= threshold: cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score), (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1) if imshow: cv2.imshow('img', imgs[i]) cv2.waitKey(0) if imwrite: cv2.imwrite(f'o4_clip-1-OnnxRuntime_result.jpg', imgs[i])out = invert_affine(framed_metas, out)display(out, ori_imgs, imshow=False, imwrite=True)
上面注释掉的代码实际上是使用pytorch调用pt文件的常规写法,这是一般人最熟悉的,保留的是使用onnxruntime调用onnx的方式的写法。上面的代码中将模型的输出数据仿照trtexec的输出格式保存到onnxruntime_result.json中。