> 文档中心 > (含源码)利用Python实现KLT跟踪算法

(含源码)利用Python实现KLT跟踪算法


利用Python实现KLT跟踪算法

NVIDIA 视觉编程接口 (VPI: Vision Programming Interface) 是 NVIDIA 的计算机视觉和图像处理软件库,使您能够实现在 NVIDIA Jetson 嵌入式设备和独立的GPU 上可用的不同硬件后端上加速的算法。

库中的一些算法包括过滤方法、透视扭曲、时间降噪、直方图均衡、立体视差和镜头失真校正。 VPI 提供易于使用的 Python 绑定以及 C++ API。

除了与 OpenCV 接口外,VPI 还能够与 PyTorch 和其他基于 Python 的库进行互操作。

下面的示例跟踪输入视频上的边界框,在每一帧上绘制它们并将结果保存在视频文件中。用户可以定义将用于处理的后端。

输出效果:
在这里插入图片描述

from __future__ import print_function  import sysfrom argparse import ArgumentParserimport numpy as npimport cv2import vpi     # Convert a colored input frame to grayscale (if needed) # and then, if using PVA backend, convert it to 16-bit unsigned pixels; # The converted frame is copied before wrapping it as a VPI image so # later draws in the gray frame do not change the reference VPI image. def convertFrameImage(inputFrame, backend):     if inputFrame.ndim == 3 and inputFrame.shape[2] == 3:  grayFrame = cv2.cvtColor(inputFrame, cv2.COLOR_BGR2GRAY)     else:  grayFrame = inputFrame     if backend == vpi.Backend.PVA:  # PVA only supports 16-bit unsigned inputs,  # where each element is in 0-255 range, so  # no rescaling is needed.  grayFrame = grayFrame.astype(np.uint16)     grayImage = vpi.asimage(grayFrame.copy())     return grayFrame, grayImage     # Write the input gray frame to output video with # input bounding boxes and predictions def writeOutput(outVideo, cvGray, inBoxes, inPreds, colors, backend):     try:  if cvGray.dtype == np.uint16:      cvGray = cvGray.astype(np.uint8)  if cvGray.dtype != np.uint8:      raise Exception('Input frame format must be grayscale, 8-bit unsigned')  cvGrayBGR = cv2.cvtColor(cvGray, cv2.COLOR_GRAY2BGR)    # Tracking the number of valid bounding boxes in the current frame  numValidBoxes = 0    # Draw the input bounding boxes considering the input predictions  with inBoxes.rlock_cpu(), inPreds.rlock_cpu() as pred:      # Array of bounding boxes (bbox) and predictions (pred)      bbox = inBoxes.cpu().view(np.recarray) for i in range(inBoxes.size):   if bbox[i].tracking_status == vpi.KLTTrackStatus.LOST:# If the tracking status of the current bounding box is lost, skip itcontinue     # Gather information of the current (i) bounding box and prediction   # Prediction scaling width, height and x, y   predScaleWidth = pred[i][0, 0]   predScaleHeight = pred[i][1, 1]   predX = pred[i][0, 2]   predY = pred[i][1, 2]     # Bounding box scaling width, height and x, y and bbox width, height   bboxScaleWidth = bbox[i].bbox.xform.mat3[0, 0]   bboxScaleHeight = bbox[i].bbox.xform.mat3[1, 1]   bboxX = bbox[i].bbox.xform.mat3[0, 2]   bboxY = bbox[i].bbox.xform.mat3[1, 2]   bboxWidth = bbox[i].bbox.width   bboxHeight = bbox[i].bbox.height     # Compute corrected x, y and width, height (w, h) by proper adding   # bounding box and prediction x, y and by proper multiplying   # bounding box w, h with its own scaling and prediction scaling   x = bboxX + predX   y = bboxY + predY   w = bboxWidth * bboxScaleWidth * predScaleWidth   h = bboxHeight * bboxScaleHeight * predScaleHeight     # Start point and end point of the bounding box for OpenCV drawing   startPoint = tuple(np.array([x, y], dtype=int))   endPoint = tuple(np.array([x, y], dtype=int) + np.array([w, h], dtype=int))     # The color of the bounding box to be drawn   bboxColor = tuple([ int(c) for c in colors[0, i] ])   cv2.rectangle(cvGrayBGR, startPoint, endPoint, bboxColor, 2)     # Incrementing the number of valid bounding boxes in the current frame   numValidBoxes += 1    print(' Valid: {:02d} boxes'.format(numValidBoxes))    outVideo.write(cvGrayBGR)     except Exception as e:  print('Error while writing output video:\n', e, file=sys.stderr)  exit(1)     # ---------------------------- # Parse command line arguments   parser = ArgumentParser() parser.add_argument('backend', choices=['cpu','cuda','pva'],help='Backend to be used for processing')   parser.add_argument('input',help='Input video')   parser.add_argument('boxes',help='Text file with bounding boxes description')   args = parser.parse_args()   if args.backend == 'cpu':     backend = vpi.Backend.CPU elif args.backend == 'cuda':     backend = vpi.Backend.CUDA else:     assert args.backend == 'pva'     backend = vpi.Backend.PVA   # ----------------------------- # Open input and output videos   inVideo = cv2.VideoCapture(args.input)   fourcc = cv2.VideoWriter_fourcc(*'MPEG') inSize = (int(inVideo.get(cv2.CAP_PROP_FRAME_WIDTH)), int(inVideo.get(cv2.CAP_PROP_FRAME_HEIGHT))) fps = inVideo.get(cv2.CAP_PROP_FPS)   outVideo = cv2.VideoWriter('klt_python'+str(sys.version_info[0])+'_'+args.backend+'.mp4',fourcc, fps, inSize)   if not outVideo.isOpened():     print("Error creating output video", file=sys.stderr)     exit(1)   # ----------------------------- # Reading input bounding boxes   # All boxes is a dictionary of all bounding boxes to be tracked in the input video, # where each value is a list of new bounding boxes to track at the frame indicated by its key allBoxes = {} totalNumBoxes = 0   # Array capacity 0 means no restricted maximum number of bounding boxes arrayCapacity = 0   if backend == vpi.Backend.PVA:     # PVA requires 128 array capacity or maximum number of bounding boxes     arrayCapacity = 128   with open(args.boxes) as f:     # The input file (f) should have one bounding box per lines as:     # "startFrame bboxX bboxY bboxWidth bboxHeight"; e.g.: "61 547 337 14 11"     for line in f.readlines():  line = line.replace('\n', '').replace('\r', '')  startFrame, x, y, w, h = [ float(v) for v in line.split(' ') ]  bb = (x, y, w, h)  if startFrame not in allBoxes:      allBoxes[startFrame] = [bb]  else:      allBoxes[startFrame].append(bb)  totalNumBoxes += 1  if totalNumBoxes == arrayCapacity:      # Stop adding boxes if its total reached the array capacity      break   curFrame    = 0 curNumBoxes = len(allBoxes[curFrame])   # ------------------------------------------------------------------------------ # Initialize VPI array with all input bounding boxes (same as C++ KLT sample)   if arrayCapacity == 0:     arrayCapacity = totalNumBoxes   inBoxes = vpi.Array(arrayCapacity, vpi.Type.KLT_TRACKED_BOUNDING_BOX)   inBoxes.size = totalNumBoxes with inBoxes.wlock_cpu():     data = inBoxes.cpu().view(np.recarray)# Global index i of all bounding boxes data, starting at 0     i = 0for f in sorted(allBoxes.keys()):  for bb in allBoxes[f]:      # Each bounding box bb is a tuple of (x, y, w, h)      x, y, w, h = bb # The bounding box data is the identity for the scaling part,      # meaning no scaling, and the offset part is its position x, y      data[i].bbox.xform.mat3[0, 0] = 1      data[i].bbox.xform.mat3[1, 1] = 1      data[i].bbox.xform.mat3[2, 2] = 1      data[i].bbox.xform.mat3[0, 2] = x      data[i].bbox.xform.mat3[1, 2] = y # The bounding box data stores its width and height w, h      data[i].bbox.width = w      data[i].bbox.height = h # Initially all boxes have status tracked and update needed      data[i].tracking_status = vpi.KLTTrackStatus.TRACKED      data[i].template_status = vpi.KLTTemplateStatus.UPDATE_NEEDED # Incrementing the global index for the next bounding box      i += 1   #------------------------------------------------------------------------------- # Generate random colors for bounding boxes equal to the C++ KLT sample   hues = np.zeros((totalNumBoxes,), dtype=np.uint8)   if int(cv2.__version__.split('.')[0]) >= 3:     cv2.setRNGSeed(1)     hues = cv2.randu(hues, 0, 180) else:     # Random differs in OpenCV-2.4     rng = cv2.cv.RNG(1)     hues = cv2.cv.fromarray(np.array([[ h for h in hues ]], dtype=np.uint8))     cv2.cv.RandArr(rng, hues, cv2.cv.CV_RAND_UNI, 0, 180)     hues = [ hues[0, i] for i in range(totalNumBoxes) ]   colors = np.array([[ [int(h), 255, 255] for h in hues ]], dtype=np.uint8) colors = cv2.cvtColor(colors, cv2.COLOR_HSV2BGR)   #------------------------------------------------------------------------------- # Initialize the KLT Feature Tracker algorithm   # Load up first frame validFrame, cvFrame = inVideo.read() if not validFrame:     print("Error reading first input frame", file=sys.stderr)     exit(1)   # Convert OpenCV frame to gray returning also the VPI image for given backend cvGray, imgTemplate = convertFrameImage(cvFrame, backend)   # Create the KLT Feature Tracker object using the backend specified by the user klt = vpi.KLTFeatureTracker(imgTemplate, inBoxes, backend=backend)   #------------------------------------------------------------------------------- # Main processing loop   while validFrame:     print('Frame: {:04d} ; Total: {:02d} boxes ;'.format(curFrame, curNumBoxes), end='')# Adjust input boxes and predictions to the current number of boxes     inPreds = klt.in_predictions()inPreds.size = curNumBoxes     inBoxes.size = curNumBoxes# Write current frame to the output video     writeOutput(outVideo, cvGray, inBoxes, inPreds, colors, backend)# Read next input frame     curFrame += 1     validFrame, cvFrame = inVideo.read()     if not validFrame:  breakcvGray, imgReference = convertFrameImage(cvFrame, backend)outBoxes = klt(imgReference)if curFrame in allBoxes:  curNumBoxes += len(allBoxes[curFrame])   outVideo.release()      # vim: ts=8:sw=4:sts=4:et:ai