Panoramic Images from Video
Completed by Lonnie Chien, Suraj Kalidindi
import numpy as np
import numpy.ma as ma
import sys, os
import cv2
import matplotlib.pyplot as plt
import random
import time
def resize_video(input_filename, start=0, end=None):
"""Resize frames `start` to `end` of video `input_filename`
Resizes to 640x480 (or smaller depending on aspect ratio).
"""
##################################################
# get video properties
if not os.path.exists(input_filename):
print('error:', input_filename, 'does not exist')
sys.exit(1)
nframes, width, height, fps = get_num_frames(input_filename)
print(input_filename, 'has', nframes,
f'frames of size {width}x{height} at {fps} fps')
##################################################
# compute output size
frac = 360.0/max(width, height)
if frac >= 1.0:
print('max dimension already <= 360, not resizing!')
sys.exit(1)
output_size = (int(round(width*frac)), int(round(height*frac)))
print('will resize to {}x{}'.format(*output_size))
##################################################
# deal with start/end indices
if end is None:
end = nframes
if start < 0 or start > nframes or end < 0 or end > nframes or end < start:
print('invalid frame indices, must have 0 <= STARTFRAME < ENDFRAME <', nframes)
sys.exit(1)
if start > 0 or end < nframes:
print(f'will write frames {start}-{end}')
##################################################
# create the output video
path_prefix, basename = os.path.split(input_filename)
basename, _ = os.path.splitext(basename)
fourcc, ext = (cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 'mp4')
output_filename = os.path.join(path_prefix, basename + '_resized.' + ext)
writer = cv2.VideoWriter(output_filename, fourcc, fps, output_size)
cap = cv2.VideoCapture(input_filename)
frame_idx = 0
while True:
ok, frame = cap.read()
if not ok or frame is None:
break
frame = cv2.resize(frame, output_size, interpolation=cv2.INTER_AREA)
if frame_idx >= start and frame_idx < end:
writer.write(frame)
frame_idx += 1
print(f'wrote {end-start} frames to {output_filename}')
def get_num_frames(input_filename):
cap = cv2.VideoCapture(input_filename)
# try the fast way
nframes = cap.get(cv2.CAP_PROP_FRAME_COUNT)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
fps = cap.get(cv2.CAP_PROP_FPS)
if not fps > 1:
# fps buggy sometimes
fps = 30.0
if nframes > 1 and width > 1 and height > 1:
# it worked
return int(nframes), int(width), int(height), fps
# the slow way
cnt = 0
while True:
ok, frame = cap.read()
if not ok or frame is None:
break
height, width = frame.shape[:2]
cnt += 1
return cnt, width, height, fps
def t_homog(image, i, j, m):
# Load an image
orig = image.copy()
# Get its size
h, w = orig.shape[:2]
size = (w, h)
######################################################################
# Now make a neat "keystone" type homography by composing a
# translation, a simple homography, and the inverse translation.
# Translate center of image to (0,0)
Tfwd = np.eye(3)
Tfwd[0,2] = -0.5 * w
Tfwd[1,2] = -0.5 * h
# Get inverse of that
Tinv = np.linalg.inv(Tfwd)
# marker
# Homography that decreases homogeneous "w" coordinate with increasing
# depth so bottom rows appear "closer" than top rows".
H = np.eye(3)
H[i,j] = m
S = np.eye(3)
S[0,0] = 0.5
S[1,1] = 0.5
# Compose the three transforms together using matrix
# multiplication.
#
# Use @ operator to do matrix multiplication on numpy arrays
# (remember * gives element-wise product)
H = S @ Tinv @ H @ Tfwd
######################################################################
# Now translate the final warped image so we can see it all. This uses
# the same trick from transrot.py, except instead of modifying the
# homography matrix directly, just composes it with a translation.
# Get corner points of original image - note this is shaped as an
# n-by-1-by-2 array, because that's what cv2.perspectiveTransform
# expects. If you have a more typical n-by-2 array, you can use
# numpy's reshape method to get it into the correct shape.
p = np.array( [ [[0, 0]],
[[w, 0]],
[[w, h]],
[[0, h]] ], dtype='float32' )
# Map through warp
pp = cv2.perspectiveTransform(p, H)
# Get integer bounding box of form (x0, y0, width, height)
box = cv2.boundingRect(pp)
# Separate into dimensions and origin
dims = box[2:4]
p0 = box[0:2]
# Create translation transformation to shift image
Tnice = np.eye(3)
Tnice[0,2] -= p0[0]
Tnice[1,2] -= p0[1]
# Compose them via matrix multiplication
Hnice = Tnice @ H
# Show it
warpedNice = cv2.warpPerspective(orig, Hnice, dims)
return warpedNice
def stitcher(img1, pts1, img2, pts2):
"""Stitches img1 and img2 into a single image using correspondences
Inputs:
img1, img2 - 2d np.array (np.uint8), of the images to stitch together
pts1, pts2 - 2d np.array (np.float32, size 2 by n), of the points to stitch together. Each
row in pts1 corresponds to the same row in pts2
Returns:
result - 2d np.array (np.uint8) of warped image (properly cropped).
"""
pts1 = pts1.reshape((len(pts1),1,2))
pts2 = pts2.reshape((len(pts2),1,2))
h,w,c = img1.shape
h2, w2, c2 = img2.shape
# find homography
borderPoints1 = np.array([[0,0],[w,0],[w,h],[0,h]]).reshape((4,1,2))
borderPoints1 = borderPoints1.astype(np.float32)
borderPoints2 = np.array([[0,0],[w2,0],[w2,h2],[0,h2]]).reshape((4,1,2))
borderPoints2 = borderPoints2.astype(np.float32)
H, mask = cv2.findHomography(pts1,pts2)
#p_trans = cv2.perspectiveTransform(pts1,H)
borderPoints1_warped = cv2.perspectiveTransform(borderPoints1,H)
allpts = np.concatenate((borderPoints1_warped,borderPoints2)).reshape(-1,2)
allpts = allpts.astype(np.float32)
# use boundingrect to get x0, y0
x0,y0,w_br,h_br = cv2.boundingRect(allpts)
# construct T
T = np.eye(3,3,dtype='float32')
T[0,2] -= x0
T[1,2] -= y0
# get M matrix M = T@H
M = T @ H
# Warp A&M, B&T
img1_resized = np.pad(img1,[(0,h_br - h),(0,w_br - w),(0,0)])
img2_resized = np.pad(img2,[(0,h_br - h2),(0,w_br - w2),(0,0)])
warpA = cv2.warpPerspective(img1_resized,M,(w_br,h_br))
warpB = cv2.warpPerspective(img2_resized,T,(w_br,h_br))
# combine warped images
m1 = ma.make_mask(warpA)
m2 = ma.make_mask(warpB)
m3 = np.logical_and(m1,m2)
combined_img = np.zeros_like(warpA)
combined_img[m1] = warpA[m1]
combined_img[m2] = warpB[m2]
combined_img[m3] = warpA[m3]//2 + warpB[m3]//2
#Reversing warp
return combined_img.astype(np.uint8)
def ransac_homography(pts1, pts2, Nmax=1000, thresh=3):
"""Finds homography between pts1 and pts2 using RANSAC
Inputs:
pts1, pts2 - 2d np.array (np.float32, size 2 by n), of the points to stitch together. Each
row in pts1 corresponds to the same row in pts2. It may contain outliers.
Nmax - int, maximum number of iterations (default 1000)
thresh - float, threshold for accepting inlier (default 3)
Returns:
inliers - list of indices to rows of valid points
"""
largest = [0]
pts1 = pts1.reshape((len(pts1),1,2))
pts2 = pts2.reshape((len(pts2),1,2))
for i in range(Nmax):
# select 4 random rows in pts1, pts2
random_points = random.sample(range(len(pts1)),4)
# find homography b/t those points
rand_pts1 = pts1[random_points,:]
rand_pts2 = pts2[random_points,:]
rand_pts1 = rand_pts1.reshape((4,1,2))
rand_pts2 = rand_pts2.reshape((4,1,2))
H, mask = cv2.findHomography(rand_pts1,rand_pts2)
# Warp pts1 to pts2_est using this homography (using cv2.perspectiveTransform)
pts2_est = cv2.perspectiveTransform(pts1,H)
# Find all rows where pts2_est and pts2 are within thresh distance.
inliers = []
pts2 = pts2.reshape((len(pts2),2))
pts2_est = pts2_est.reshape((len(pts2),2))
for k in range(len(pts2)):
#Finding euclidean distance between pts2 and pts2_est
dist = ((pts2[k][0]-pts2_est[k][0])**2+(pts2[k][1]-pts2_est[k][1])**2)**0.5
if dist <= thresh:
inliers.append(k)
# save largest inlier
if len(inliers) > len(largest):
largest = inliers
return largest
def get_points(img1, img2):
'''
gets matching points of two images
Inputs:
img1, img2
Returns:
pts1_ransac,pts2_ransac - lists of matching points for each image
'''
gray1 = cv2.cvtColor(img1, cv2.COLOR_RGB2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_RGB2GRAY)
sift = cv2.SIFT_create()
corners_1,des1 = sift.detectAndCompute(gray1,None)
corners_2,des2 = sift.detectAndCompute(gray2,None)
bf = cv2.BFMatcher()
matches_all = bf.knnMatch(des1,des2,k=2)
pts1 = []
pts2 = []
#Finding good matches
matches = []
for m,n in matches_all:
if m.distance < 0.75*n.distance:
matches.append([m])
for i in range(len(matches)):
(x1,y1) = corners_1[matches[i][0].queryIdx].pt
(x2,y2) = corners_2[matches[i][0].trainIdx].pt
pts1.append([x1,y1])
pts2.append([x2,y2])
pts1 = np.array(pts1,dtype=np.float32)
pts2 = np.array(pts2,dtype=np.float32)
inliers = ransac_homography(pts1, pts2, Nmax=500, thresh=5)
pts1_ransac = pts1[inliers,:]
pts2_ransac = pts2[inliers,:]
return pts1_ransac, pts2_ransac
def videoToFrames(video_file,N = 55):
"""Converts Video to a list of frames
Inputs:
video_file
N - number of frames required in video
Returns:
images - list of frames
"""
resize_video(video_file)
# get frames out of resized video
vidcap = cv2.VideoCapture(video_file)
success,image = vidcap.read()
count = 0
while success:
cv2.imwrite("/work/Frames/frame%d.jpg" % count, image) # save frame as JPEG file
success,image = vidcap.read()
count += 1
# write each Nth frame into images list
images = []
for i in range(count//N):
img_path = '/work/Frames/frame%d.jpg' % (i*N)
img_selected = cv2.cvtColor(cv2.imread(img_path),cv2.COLOR_BGR2RGB)
images.append(img_selected[::4,::4])
return images
def makePano(images):
"""
Makes the Panorama image
Inputs:
images - list of images to stitch together
Returns:
imgNew - Panorama image
"""
imgNew = images[0]
fig = plt.figure(figsize = (10,7))
for i in range(len(images)-1):
start = time.perf_counter()
pts1,pts2 = get_points(imgNew,images[i+1])
imgStitch = stitcher(imgNew,pts1,images[i+1],pts2)
imgNew = imgStitch
fig.add_subplot(int((len(images)-1)/3)+int((len(images)-1)%3>0),3,i+1)
plt.imshow(imgNew)
return imgNew
video_file = "/work/input video/woman.mp4"
images = videoToFrames(video_file,N = 75) # if run out of memory, increase value of N here
finalImage = makePano(images)
plt.figure(figsize = (10,7))
plt.imshow(finalImage)
woman_warp = t_homog(finalImage,2,0,-0.00065)
plt.figure(figsize = (10,7))
plt.imshow(woman_warp)
woman_output = cv2.cvtColor(woman_warp, cv2.COLOR_RGB2BGR)
cv2.imwrite('/work/output panoramas/woman_output.jpg',woman_output)
video_file = "/work/input video/art.mp4"
images = videoToFrames(video_file,N = 20) # if run out of memory, increase value of N here
finalImage = makePano(images)
plt.figure(figsize = (10,7))
plt.imshow(finalImage)
art_warp = t_homog(finalImage,2,1,0.0005)
plt.figure(figsize = (10,7))
plt.imshow(art_warp)
art_output = cv2.cvtColor(art_warp, cv2.COLOR_RGB2BGR)
cv2.imwrite('/work/output panoramas/art_output.jpg',art_output)