(含源码)利用Python实现KLT跟踪算法
利用Python实现KLT跟踪算法
NVIDIA 视觉编程接口 (VPI: Vision Programming Interface
) 是 NVIDIA 的计算机视觉和图像处理软件库,使您能够实现在 NVIDIA Jetson 嵌入式设备和独立的GPU 上可用的不同硬件后端上加速的算法。
库中的一些算法包括过滤方法、透视扭曲、时间降噪、直方图均衡、立体视差和镜头失真校正。 VPI 提供易于使用的 Python 绑定以及 C++ API。
除了与 OpenCV 接口外,VPI 还能够与 PyTorch 和其他基于 Python 的库进行互操作。
下面的示例跟踪输入视频上的边界框,在每一帧上绘制它们并将结果保存在视频文件中。用户可以定义将用于处理的后端。
输出效果:
from __future__ import print_function import sysfrom argparse import ArgumentParserimport numpy as npimport cv2import vpi # Convert a colored input frame to grayscale (if needed) # and then, if using PVA backend, convert it to 16-bit unsigned pixels; # The converted frame is copied before wrapping it as a VPI image so # later draws in the gray frame do not change the reference VPI image. def convertFrameImage(inputFrame, backend): if inputFrame.ndim == 3 and inputFrame.shape[2] == 3: grayFrame = cv2.cvtColor(inputFrame, cv2.COLOR_BGR2GRAY) else: grayFrame = inputFrame if backend == vpi.Backend.PVA: # PVA only supports 16-bit unsigned inputs, # where each element is in 0-255 range, so # no rescaling is needed. grayFrame = grayFrame.astype(np.uint16) grayImage = vpi.asimage(grayFrame.copy()) return grayFrame, grayImage # Write the input gray frame to output video with # input bounding boxes and predictions def writeOutput(outVideo, cvGray, inBoxes, inPreds, colors, backend): try: if cvGray.dtype == np.uint16: cvGray = cvGray.astype(np.uint8) if cvGray.dtype != np.uint8: raise Exception('Input frame format must be grayscale, 8-bit unsigned') cvGrayBGR = cv2.cvtColor(cvGray, cv2.COLOR_GRAY2BGR) # Tracking the number of valid bounding boxes in the current frame numValidBoxes = 0 # Draw the input bounding boxes considering the input predictions with inBoxes.rlock_cpu(), inPreds.rlock_cpu() as pred: # Array of bounding boxes (bbox) and predictions (pred) bbox = inBoxes.cpu().view(np.recarray) for i in range(inBoxes.size): if bbox[i].tracking_status == vpi.KLTTrackStatus.LOST:# If the tracking status of the current bounding box is lost, skip itcontinue # Gather information of the current (i) bounding box and prediction # Prediction scaling width, height and x, y predScaleWidth = pred[i][0, 0] predScaleHeight = pred[i][1, 1] predX = pred[i][0, 2] predY = pred[i][1, 2] # Bounding box scaling width, height and x, y and bbox width, height bboxScaleWidth = bbox[i].bbox.xform.mat3[0, 0] bboxScaleHeight = bbox[i].bbox.xform.mat3[1, 1] bboxX = bbox[i].bbox.xform.mat3[0, 2] bboxY = bbox[i].bbox.xform.mat3[1, 2] bboxWidth = bbox[i].bbox.width bboxHeight = bbox[i].bbox.height # Compute corrected x, y and width, height (w, h) by proper adding # bounding box and prediction x, y and by proper multiplying # bounding box w, h with its own scaling and prediction scaling x = bboxX + predX y = bboxY + predY w = bboxWidth * bboxScaleWidth * predScaleWidth h = bboxHeight * bboxScaleHeight * predScaleHeight # Start point and end point of the bounding box for OpenCV drawing startPoint = tuple(np.array([x, y], dtype=int)) endPoint = tuple(np.array([x, y], dtype=int) + np.array([w, h], dtype=int)) # The color of the bounding box to be drawn bboxColor = tuple([ int(c) for c in colors[0, i] ]) cv2.rectangle(cvGrayBGR, startPoint, endPoint, bboxColor, 2) # Incrementing the number of valid bounding boxes in the current frame numValidBoxes += 1 print(' Valid: {:02d} boxes'.format(numValidBoxes)) outVideo.write(cvGrayBGR) except Exception as e: print('Error while writing output video:\n', e, file=sys.stderr) exit(1) # ---------------------------- # Parse command line arguments parser = ArgumentParser() parser.add_argument('backend', choices=['cpu','cuda','pva'],help='Backend to be used for processing') parser.add_argument('input',help='Input video') parser.add_argument('boxes',help='Text file with bounding boxes description') args = parser.parse_args() if args.backend == 'cpu': backend = vpi.Backend.CPU elif args.backend == 'cuda': backend = vpi.Backend.CUDA else: assert args.backend == 'pva' backend = vpi.Backend.PVA # ----------------------------- # Open input and output videos inVideo = cv2.VideoCapture(args.input) fourcc = cv2.VideoWriter_fourcc(*'MPEG') inSize = (int(inVideo.get(cv2.CAP_PROP_FRAME_WIDTH)), int(inVideo.get(cv2.CAP_PROP_FRAME_HEIGHT))) fps = inVideo.get(cv2.CAP_PROP_FPS) outVideo = cv2.VideoWriter('klt_python'+str(sys.version_info[0])+'_'+args.backend+'.mp4',fourcc, fps, inSize) if not outVideo.isOpened(): print("Error creating output video", file=sys.stderr) exit(1) # ----------------------------- # Reading input bounding boxes # All boxes is a dictionary of all bounding boxes to be tracked in the input video, # where each value is a list of new bounding boxes to track at the frame indicated by its key allBoxes = {} totalNumBoxes = 0 # Array capacity 0 means no restricted maximum number of bounding boxes arrayCapacity = 0 if backend == vpi.Backend.PVA: # PVA requires 128 array capacity or maximum number of bounding boxes arrayCapacity = 128 with open(args.boxes) as f: # The input file (f) should have one bounding box per lines as: # "startFrame bboxX bboxY bboxWidth bboxHeight"; e.g.: "61 547 337 14 11" for line in f.readlines(): line = line.replace('\n', '').replace('\r', '') startFrame, x, y, w, h = [ float(v) for v in line.split(' ') ] bb = (x, y, w, h) if startFrame not in allBoxes: allBoxes[startFrame] = [bb] else: allBoxes[startFrame].append(bb) totalNumBoxes += 1 if totalNumBoxes == arrayCapacity: # Stop adding boxes if its total reached the array capacity break curFrame = 0 curNumBoxes = len(allBoxes[curFrame]) # ------------------------------------------------------------------------------ # Initialize VPI array with all input bounding boxes (same as C++ KLT sample) if arrayCapacity == 0: arrayCapacity = totalNumBoxes inBoxes = vpi.Array(arrayCapacity, vpi.Type.KLT_TRACKED_BOUNDING_BOX) inBoxes.size = totalNumBoxes with inBoxes.wlock_cpu(): data = inBoxes.cpu().view(np.recarray)# Global index i of all bounding boxes data, starting at 0 i = 0for f in sorted(allBoxes.keys()): for bb in allBoxes[f]: # Each bounding box bb is a tuple of (x, y, w, h) x, y, w, h = bb # The bounding box data is the identity for the scaling part, # meaning no scaling, and the offset part is its position x, y data[i].bbox.xform.mat3[0, 0] = 1 data[i].bbox.xform.mat3[1, 1] = 1 data[i].bbox.xform.mat3[2, 2] = 1 data[i].bbox.xform.mat3[0, 2] = x data[i].bbox.xform.mat3[1, 2] = y # The bounding box data stores its width and height w, h data[i].bbox.width = w data[i].bbox.height = h # Initially all boxes have status tracked and update needed data[i].tracking_status = vpi.KLTTrackStatus.TRACKED data[i].template_status = vpi.KLTTemplateStatus.UPDATE_NEEDED # Incrementing the global index for the next bounding box i += 1 #------------------------------------------------------------------------------- # Generate random colors for bounding boxes equal to the C++ KLT sample hues = np.zeros((totalNumBoxes,), dtype=np.uint8) if int(cv2.__version__.split('.')[0]) >= 3: cv2.setRNGSeed(1) hues = cv2.randu(hues, 0, 180) else: # Random differs in OpenCV-2.4 rng = cv2.cv.RNG(1) hues = cv2.cv.fromarray(np.array([[ h for h in hues ]], dtype=np.uint8)) cv2.cv.RandArr(rng, hues, cv2.cv.CV_RAND_UNI, 0, 180) hues = [ hues[0, i] for i in range(totalNumBoxes) ] colors = np.array([[ [int(h), 255, 255] for h in hues ]], dtype=np.uint8) colors = cv2.cvtColor(colors, cv2.COLOR_HSV2BGR) #------------------------------------------------------------------------------- # Initialize the KLT Feature Tracker algorithm # Load up first frame validFrame, cvFrame = inVideo.read() if not validFrame: print("Error reading first input frame", file=sys.stderr) exit(1) # Convert OpenCV frame to gray returning also the VPI image for given backend cvGray, imgTemplate = convertFrameImage(cvFrame, backend) # Create the KLT Feature Tracker object using the backend specified by the user klt = vpi.KLTFeatureTracker(imgTemplate, inBoxes, backend=backend) #------------------------------------------------------------------------------- # Main processing loop while validFrame: print('Frame: {:04d} ; Total: {:02d} boxes ;'.format(curFrame, curNumBoxes), end='')# Adjust input boxes and predictions to the current number of boxes inPreds = klt.in_predictions()inPreds.size = curNumBoxes inBoxes.size = curNumBoxes# Write current frame to the output video writeOutput(outVideo, cvGray, inBoxes, inPreds, colors, backend)# Read next input frame curFrame += 1 validFrame, cvFrame = inVideo.read() if not validFrame: breakcvGray, imgReference = convertFrameImage(cvFrame, backend)outBoxes = klt(imgReference)if curFrame in allBoxes: curNumBoxes += len(allBoxes[curFrame]) outVideo.release() # vim: ts=8:sw=4:sts=4:et:ai