import numpy as np import matplotlib.pyplot as plt import pandas as pd from time import process_time

(A) Algorithm Implementation

def insertionSort(arr, l, r): c=0 for i in range(l+1, r+1): for j in range(i, l,-1): c+=1 #update key comparison k=arr[j] if arr[j]<arr[j-1]: arr[j]=arr[j-1] arr[j-1]=k else: break return c def mergeSertionSort(arr, l, r, S): if(r-l<=0): return 0 # Switch to insertion sort for small sub-arrays < size S if(r-l+1<=S): return insertionSort(arr, l, r) mid = (l+r)//2 c1=mergeSertionSort(arr, l, mid, S) c2=mergeSertionSort(arr, mid+1, r, S) c3=merge(arr, l, r, mid) #returns total no. key comparisons return c1+c2+c3 #Merging 2 sorted arrays using auxiliary arrays def merge(arr, l, r, m): #Creating auxiliary arrays for left and right subarrays llist = arr[l:m+1] rlist = arr[m+1:r+1] #Initialising pointers pointing to the head of each copied subarray and the original array lptr = 0 rptr = 0 aptr = l c=0 while(lptr<len(llist) and rptr<len(rlist)): #Updating key comparison c+=1 #If element of left subarray smaller than element of right subarray, replace element of original array with it if(llist[lptr]<rlist[rptr]): arr[aptr]=llist[lptr] aptr+=1 lptr+=1 #Replace element of original array with right subarray element if it is smaller than left subarray element elif (llist[lptr]>rlist[rptr]): arr[aptr]=rlist[rptr] aptr+=1 rptr+=1 #If elements of both subarrays are equal, add both into original array else: arr[aptr]=llist[lptr] aptr+=1 arr[aptr]=rlist[rptr] aptr+=1 rptr+=1 lptr+=1 #Unvisited elements are larger than visited elements. Add to back of original array. while(lptr<len(llist)): arr[aptr]=llist[lptr] aptr+=1 lptr+=1 while(rptr<len(rlist)): arr[aptr]=rlist[rptr] aptr+=1 rptr+=1 return c

arrTest=[20, 48, 36, 10, 31, 13, 29, 5, 2, 34, 15, 14, 42, 40, 43, 11, 21, 49, 8, 3] print("Original Array:", arrTest, sep=", ") print("No. of Key Comparisons:",mergeSertionSort(arrTest, 0, 19, 3)) print("Sorted Array:",arrTest, sep=", ")

(B) Generate Input Data

#list of exponents num = [3, 4, 5, 6, 7] #array for datasets data = [] for n in num: data.append(np.random.randint(1000000, size = 10**n).tolist()) print(data[0])

(C) Analyse Time Complexity

i. Fixed S, Vary n

S=10 count = [] for i in range(5): temp=list(data[i]) counter=mergeSertionSort(temp,0,len(temp)-1,S) count.append(counter) print(count)

xaxis = [] for i in num: x = 10**i xaxis.append(x) f,axes = plt.subplots(1,1,figsize=(16,10)) plt.plot(xaxis,count,linewidth=3) plt.xlabel("Size of array") plt.ylabel("Key comparisons") plt.title('Empirical W(n) against n', fontsize=20) plt.xscale('log') plt.xticks([1000,10000,100000,1000000,10000000]) plt.show()

from IPython.display import Image Image(filename='/work/Screenshot 2022-02-14 at 11.11.22 PM.png')

x=np.array(range(1,10000000)) y=x*11+x*(np.log2(x/10)) f,axes = plt.subplots(1,1,figsize=(16,10)) plt.plot(x,y) plt.xlabel('Size of array, n') plt.ylabel('Key comparisons, W(n)') plt.xscale('log') plt.title('Theoretical W(n) against n', fontsize=20) plt.show()

ii. Fixed n, Vary S

count2 = [] for S in range(2, 101): tempcopy = list(data[2]) # test_copy = list(testdata) counter = mergeSertionSort(tempcopy,0,99999,S) count2.append(counter) print(count2)

xaxis = np.linspace(2,100,99) f,axes = plt.subplots(1,1,figsize=(16,10)) plt.plot(xaxis,count2,linewidth=3) plt.xlabel("S value") plt.ylabel("Key comparisons") plt.title('Empirical W(n) against S', fontsize=20) plt.show()

print("The optimal S value for n = 100,000 is : ", end="") print(count2.index(min(count2))+2)

x=np.array(range(1,101)) y=100000*x+100000*(np.log2(100000/x)) f,axes = plt.subplots(1,1,figsize=(16,10)) plt.plot(x,y) plt.xlabel('Size of S, S') plt.ylabel('Key comparisons, W(n)') plt.title('Theoretical W(n) against S', fontsize=20) plt.show()

iii. Finding Optimal S

count3 = [] x_S = [] y_arrSize = [] num = [3, 4, 5] for S in range(1, 30): for i in num: temparr = list(data[i-3]) counter = mergeSertionSort(temparr,0,len(temparr)-1,S) count3.append(counter) y_arrSize.append(10**i) x_S.append(S) print(count3)

fig = plt.figure() ax = fig.add_subplot(111, projection='3d') fig.set_size_inches(10, 10) ax.plot(x_S, y_arrSize, count3) ax.set_title("Finding Optimal S") ax.set_xlabel("S value") ax.set_ylabel("Size of array") ax.set_zlabel("Key comparisons") plt.show()

Given the rather linear trend of size of array VS key comparisons, we can thus narrow down our analysis to looking at S value VS key comparisons.

Hence, zooming in onto the individual n values, we plot the S value against the number of key comparisons to identify a common trend as S varies from 2 to 10.

(1) n = 1000

count3_1 = [] for S in range(2, 10): temp1 = list(data[0]) counter = mergeSertionSort(temp1,0,999,S) count3_1.append(counter) print(count3_1) x=np.array(range(2,10)) f,axes = plt.subplots(1,1,figsize=(16,10)) plt.plot(x,count3_1,linewidth=3) plt.xlabel("S value") plt.ylabel("Key comparisons") plt.title('Empirical W(n) against S for n=1000', fontsize=20) plt.show()

optS_1000=count3_1.index(min(count3_1))+2 print("The optimal S value for n = 1000 is : ", end="") print(optS_1000)

(2) n = 10,000

count3_2 = [] for S in range(2, 10): temp2=list(data[1]) counter = mergeSertionSort(temp2,0,9999,S) count3_2.append(counter) print(count3_2) x=np.array(range(2,10)) f,axes = plt.subplots(1,1,figsize=(16,10)) plt.plot(x,count3_2,linewidth=3) plt.xlabel("S value") plt.ylabel("Key comparisons") plt.title('Empirical W(n) against S for n=10000', fontsize=20) plt.show()

optS_10000=count3_2.index(min(count3_2))+2 print("The optimal S value for n = 10,000 is : ", end="") print(optS_10000)

(3) n = 100,000

count3_3 = [] for S in range(2, 10): temp3=list(data[2]) counter = mergeSertionSort(temp3,0,99999,S) count3_3.append(counter) print(count3_3) x=np.array(range(2,10)) f,axes = plt.subplots(1,1,figsize=(16,10)) plt.plot(x,count3_3,linewidth=3) plt.xlabel("S value") plt.ylabel("Key comparisons") plt.title('Empirical W(n) against S for n=100000', fontsize=20) plt.show()

optS_100000=count3_3.index(min(count3_3))+2 print("The optimal S value for n = 100,000 is : ", end="") print(optS_100000)

(4) n = 1,000,000

count3_4 = [] for S in range(2,10): temp4=list(data[3]) counter = mergeSertionSort(temp4,0,999999,S) count3_4.append(counter) print(count3_4)

f,axes = plt.subplots(1,1,figsize=(16,10)) plt.plot(x,count3_4,linewidth=3) plt.xlabel("S value") plt.ylabel("Key comparisons") plt.title('Empirical W(n) against S for n=1000000', fontsize=20) plt.show()

optS_1000000=count3_4.index(min(count3_4))+2 print("The optimal S value for n = 1,000,000 is : ", end="") print(optS_1000000)

(5) n=10,000,000

count3_5 = [] for S in range(2, 10): temp5=list(data[4]) counter = mergeSertionSort(temp5,0,9999999,S) count3_5.append(counter) print(count3_5)

f,axes = plt.subplots(1,1,figsize=(16,10)) x=np.array(range(2,10)) plt.plot(x,count3_5,linewidth=3) plt.xlabel("S value") plt.ylabel("Key comparisons") plt.title('Empirical W(n) against S for n=1000000', fontsize=20) plt.show()

optS_10000000=count3_5.index(min(count3_5))+2 print("The optimal S value for n = 10,000,000 is : ", end="") print(optS_10000000)

new_x=np.linspace(2,10,50) poly1=np.poly1d(np.polyfit(x, count3_1, 2)) poly2=np.poly1d(np.polyfit(x, count3_2, 2)) poly3=np.poly1d(np.polyfit(x, count3_3, 2)) poly4=np.poly1d(np.polyfit(x, count3_4, 2)) poly5=np.poly1d(np.polyfit(x, count3_5, 2)) new_y1=poly1(new_x) new_y2=poly2(new_x) new_y3=poly3(new_x) new_y4=poly4(new_x) new_y5=poly5(new_x) f,axes = plt.subplots(3,2,figsize=(10,10)) axes[0,0].plot(x,count3_1,linewidth=3) axes[0,0].plot(new_x, new_y1) axes[0,1].plot(x,count3_2,linewidth=3) axes[0,1].plot(new_x, new_y2) axes[1,0].plot(x,count3_3,linewidth=3) axes[1,0].plot(new_x, new_y3) axes[1,1].plot(x,count3_4,linewidth=3) axes[1,1].plot(new_x, new_y4) axes[2,0].plot(x,count3_5,linewidth=3) axes[2,0].plot(new_x, new_y5) plt.show()

print("Average Optimal S Value is: ", (optS_1000+optS_10000+optS_100000+optS_1000000+optS_10000000)//5+1)

(D) MergeSertionSort vs MergeSort

def mergeSort(arr, l, r): if(r-l<=0): return 0 mid = (l+r)//2 c1=mergeSort(arr, l, mid) c2=mergeSort(arr, mid+1, r) c3=merge(arr, l, r, mid) #returns total no. key comparisons return c1+c2+c3

optS=6 msdata=list(data[4]) mssdata=list(data[4]) print("Regular MergeSort Key Comparisons: ",mergeSort(msdata, 0, 9999999)) print("MergeSertionSort Key Comparisons: ", mergeSertionSort(mssdata, 0, 9999999, optS))

df = pd.DataFrame(columns=('MergeSort','MergeSertionSort')) for i in range(5): arr1 = list(data[4]) arr2 = list(data[4]) tms = process_time() mergeSort(arr1, 0, 9999999) tms2 = process_time() mergeSertionSort(arr2, 0, 9999999, optS) mss=process_time() df.loc[i]=[tms2-tms, mss-tms2] display(df) print("Average Results:\n",df.mean(axis=0))

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}(A) Algorithm Implementation