Panoramic Images from Video

Completed by Lonnie Chien, Suraj Kalidindi

import numpy as np import numpy.ma as ma import sys, os import cv2 import matplotlib.pyplot as plt import random import time

def resize_video(input_filename, start=0, end=None): """Resize frames `start` to `end` of video `input_filename` Resizes to 640x480 (or smaller depending on aspect ratio). """ ################################################## # get video properties if not os.path.exists(input_filename): print('error:', input_filename, 'does not exist') sys.exit(1) nframes, width, height, fps = get_num_frames(input_filename) print(input_filename, 'has', nframes, f'frames of size {width}x{height} at {fps} fps') ################################################## # compute output size frac = 360.0/max(width, height) if frac >= 1.0: print('max dimension already <= 360, not resizing!') sys.exit(1) output_size = (int(round(width*frac)), int(round(height*frac))) print('will resize to {}x{}'.format(*output_size)) ################################################## # deal with start/end indices if end is None: end = nframes if start < 0 or start > nframes or end < 0 or end > nframes or end < start: print('invalid frame indices, must have 0 <= STARTFRAME < ENDFRAME <', nframes) sys.exit(1) if start > 0 or end < nframes: print(f'will write frames {start}-{end}') ################################################## # create the output video path_prefix, basename = os.path.split(input_filename) basename, _ = os.path.splitext(basename) fourcc, ext = (cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 'mp4') output_filename = os.path.join(path_prefix, basename + '_resized.' + ext) writer = cv2.VideoWriter(output_filename, fourcc, fps, output_size) cap = cv2.VideoCapture(input_filename) frame_idx = 0 while True: ok, frame = cap.read() if not ok or frame is None: break frame = cv2.resize(frame, output_size, interpolation=cv2.INTER_AREA) if frame_idx >= start and frame_idx < end: writer.write(frame) frame_idx += 1 print(f'wrote {end-start} frames to {output_filename}') def get_num_frames(input_filename): cap = cv2.VideoCapture(input_filename) # try the fast way nframes = cap.get(cv2.CAP_PROP_FRAME_COUNT) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) fps = cap.get(cv2.CAP_PROP_FPS) if not fps > 1: # fps buggy sometimes fps = 30.0 if nframes > 1 and width > 1 and height > 1: # it worked return int(nframes), int(width), int(height), fps # the slow way cnt = 0 while True: ok, frame = cap.read() if not ok or frame is None: break height, width = frame.shape[:2] cnt += 1 return cnt, width, height, fps

def t_homog(image, i, j, m): # Load an image orig = image.copy() # Get its size h, w = orig.shape[:2] size = (w, h) ###################################################################### # Now make a neat "keystone" type homography by composing a # translation, a simple homography, and the inverse translation. # Translate center of image to (0,0) Tfwd = np.eye(3) Tfwd[0,2] = -0.5 * w Tfwd[1,2] = -0.5 * h # Get inverse of that Tinv = np.linalg.inv(Tfwd) # marker # Homography that decreases homogeneous "w" coordinate with increasing # depth so bottom rows appear "closer" than top rows". H = np.eye(3) H[i,j] = m S = np.eye(3) S[0,0] = 0.5 S[1,1] = 0.5 # Compose the three transforms together using matrix # multiplication. # # Use @ operator to do matrix multiplication on numpy arrays # (remember * gives element-wise product) H = S @ Tinv @ H @ Tfwd ###################################################################### # Now translate the final warped image so we can see it all. This uses # the same trick from transrot.py, except instead of modifying the # homography matrix directly, just composes it with a translation. # Get corner points of original image - note this is shaped as an # n-by-1-by-2 array, because that's what cv2.perspectiveTransform # expects. If you have a more typical n-by-2 array, you can use # numpy's reshape method to get it into the correct shape. p = np.array( [ [[0, 0]], [[w, 0]], [[w, h]], [[0, h]] ], dtype='float32' ) # Map through warp pp = cv2.perspectiveTransform(p, H) # Get integer bounding box of form (x0, y0, width, height) box = cv2.boundingRect(pp) # Separate into dimensions and origin dims = box[2:4] p0 = box[0:2] # Create translation transformation to shift image Tnice = np.eye(3) Tnice[0,2] -= p0[0] Tnice[1,2] -= p0[1] # Compose them via matrix multiplication Hnice = Tnice @ H # Show it warpedNice = cv2.warpPerspective(orig, Hnice, dims) return warpedNice

def stitcher(img1, pts1, img2, pts2): """Stitches img1 and img2 into a single image using correspondences Inputs: img1, img2 - 2d np.array (np.uint8), of the images to stitch together pts1, pts2 - 2d np.array (np.float32, size 2 by n), of the points to stitch together. Each row in pts1 corresponds to the same row in pts2 Returns: result - 2d np.array (np.uint8) of warped image (properly cropped). """ pts1 = pts1.reshape((len(pts1),1,2)) pts2 = pts2.reshape((len(pts2),1,2)) h,w,c = img1.shape h2, w2, c2 = img2.shape # find homography borderPoints1 = np.array([[0,0],[w,0],[w,h],[0,h]]).reshape((4,1,2)) borderPoints1 = borderPoints1.astype(np.float32) borderPoints2 = np.array([[0,0],[w2,0],[w2,h2],[0,h2]]).reshape((4,1,2)) borderPoints2 = borderPoints2.astype(np.float32) H, mask = cv2.findHomography(pts1,pts2) #p_trans = cv2.perspectiveTransform(pts1,H) borderPoints1_warped = cv2.perspectiveTransform(borderPoints1,H) allpts = np.concatenate((borderPoints1_warped,borderPoints2)).reshape(-1,2) allpts = allpts.astype(np.float32) # use boundingrect to get x0, y0 x0,y0,w_br,h_br = cv2.boundingRect(allpts) # construct T T = np.eye(3,3,dtype='float32') T[0,2] -= x0 T[1,2] -= y0 # get M matrix M = T@H M = T @ H # Warp A&M, B&T img1_resized = np.pad(img1,[(0,h_br - h),(0,w_br - w),(0,0)]) img2_resized = np.pad(img2,[(0,h_br - h2),(0,w_br - w2),(0,0)]) warpA = cv2.warpPerspective(img1_resized,M,(w_br,h_br)) warpB = cv2.warpPerspective(img2_resized,T,(w_br,h_br)) # combine warped images m1 = ma.make_mask(warpA) m2 = ma.make_mask(warpB) m3 = np.logical_and(m1,m2) combined_img = np.zeros_like(warpA) combined_img[m1] = warpA[m1] combined_img[m2] = warpB[m2] combined_img[m3] = warpA[m3]//2 + warpB[m3]//2 #Reversing warp return combined_img.astype(np.uint8)

def ransac_homography(pts1, pts2, Nmax=1000, thresh=3): """Finds homography between pts1 and pts2 using RANSAC Inputs: pts1, pts2 - 2d np.array (np.float32, size 2 by n), of the points to stitch together. Each row in pts1 corresponds to the same row in pts2. It may contain outliers. Nmax - int, maximum number of iterations (default 1000) thresh - float, threshold for accepting inlier (default 3) Returns: inliers - list of indices to rows of valid points """ largest = [0] pts1 = pts1.reshape((len(pts1),1,2)) pts2 = pts2.reshape((len(pts2),1,2)) for i in range(Nmax): # select 4 random rows in pts1, pts2 random_points = random.sample(range(len(pts1)),4) # find homography b/t those points rand_pts1 = pts1[random_points,:] rand_pts2 = pts2[random_points,:] rand_pts1 = rand_pts1.reshape((4,1,2)) rand_pts2 = rand_pts2.reshape((4,1,2)) H, mask = cv2.findHomography(rand_pts1,rand_pts2) # Warp pts1 to pts2_est using this homography (using cv2.perspectiveTransform) pts2_est = cv2.perspectiveTransform(pts1,H) # Find all rows where pts2_est and pts2 are within thresh distance. inliers = [] pts2 = pts2.reshape((len(pts2),2)) pts2_est = pts2_est.reshape((len(pts2),2)) for k in range(len(pts2)): #Finding euclidean distance between pts2 and pts2_est dist = ((pts2[k][0]-pts2_est[k][0])**2+(pts2[k][1]-pts2_est[k][1])**2)**0.5 if dist <= thresh: inliers.append(k) # save largest inlier if len(inliers) > len(largest): largest = inliers return largest

def get_points(img1, img2): ''' gets matching points of two images Inputs: img1, img2 Returns: pts1_ransac,pts2_ransac - lists of matching points for each image ''' gray1 = cv2.cvtColor(img1, cv2.COLOR_RGB2GRAY) gray2 = cv2.cvtColor(img2, cv2.COLOR_RGB2GRAY) sift = cv2.SIFT_create() corners_1,des1 = sift.detectAndCompute(gray1,None) corners_2,des2 = sift.detectAndCompute(gray2,None) bf = cv2.BFMatcher() matches_all = bf.knnMatch(des1,des2,k=2) pts1 = [] pts2 = [] #Finding good matches matches = [] for m,n in matches_all: if m.distance < 0.75*n.distance: matches.append([m]) for i in range(len(matches)): (x1,y1) = corners_1[matches[i][0].queryIdx].pt (x2,y2) = corners_2[matches[i][0].trainIdx].pt pts1.append([x1,y1]) pts2.append([x2,y2]) pts1 = np.array(pts1,dtype=np.float32) pts2 = np.array(pts2,dtype=np.float32) inliers = ransac_homography(pts1, pts2, Nmax=500, thresh=5) pts1_ransac = pts1[inliers,:] pts2_ransac = pts2[inliers,:] return pts1_ransac, pts2_ransac

def videoToFrames(video_file,N = 55): """Converts Video to a list of frames Inputs: video_file N - number of frames required in video Returns: images - list of frames """ resize_video(video_file) # get frames out of resized video vidcap = cv2.VideoCapture(video_file) success,image = vidcap.read() count = 0 while success: cv2.imwrite("/work/Frames/frame%d.jpg" % count, image) # save frame as JPEG file success,image = vidcap.read() count += 1 # write each Nth frame into images list images = [] for i in range(count//N): img_path = '/work/Frames/frame%d.jpg' % (i*N) img_selected = cv2.cvtColor(cv2.imread(img_path),cv2.COLOR_BGR2RGB) images.append(img_selected[::4,::4]) return images def makePano(images): """ Makes the Panorama image Inputs: images - list of images to stitch together Returns: imgNew - Panorama image """ imgNew = images[0] fig = plt.figure(figsize = (10,7)) for i in range(len(images)-1): start = time.perf_counter() pts1,pts2 = get_points(imgNew,images[i+1]) imgStitch = stitcher(imgNew,pts1,images[i+1],pts2) imgNew = imgStitch fig.add_subplot(int((len(images)-1)/3)+int((len(images)-1)%3>0),3,i+1) plt.imshow(imgNew) return imgNew

video_file = "/work/input video/woman.mp4" images = videoToFrames(video_file,N = 75) # if run out of memory, increase value of N here finalImage = makePano(images) plt.figure(figsize = (10,7)) plt.imshow(finalImage)

woman_warp = t_homog(finalImage,2,0,-0.00065) plt.figure(figsize = (10,7)) plt.imshow(woman_warp)

woman_output = cv2.cvtColor(woman_warp, cv2.COLOR_RGB2BGR) cv2.imwrite('/work/output panoramas/woman_output.jpg',woman_output)

video_file = "/work/input video/art.mp4" images = videoToFrames(video_file,N = 20) # if run out of memory, increase value of N here finalImage = makePano(images) plt.figure(figsize = (10,7)) plt.imshow(finalImage)

art_warp = t_homog(finalImage,2,1,0.0005) plt.figure(figsize = (10,7)) plt.imshow(art_warp)

art_output = cv2.cvtColor(art_warp, cv2.COLOR_RGB2BGR) cv2.imwrite('/work/output panoramas/art_output.jpg',art_output)

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Panoramic Images from Video

Completed by Lonnie Chien, Suraj Kalidindi

Panoramic Images from Video