Pset6-Voice Recognition

import numpy as np import numpy.random as random #from numpy.fft import fft from scipy.io import wavfile import matplotlib.pyplot as plt import seaborn as sns import os from scipy import fftpack #had to import because couldn't use np.fft.fftfreq %matplotlib inline sns.set() sns.set(font_scale=1.5)

data_dir = './recordings/' # determine digits of interest (0 to 9) digits = [1,2] # change here to load more digits # dictionary that will store our values signals = {d:[] for d in digits} file_names = {d:[] for d in digits} # import files for filename in os.listdir(data_dir): # iterate over digits for d in digits: if filename.startswith(str(d)+'_'): wav = wavfile.read(data_dir+filename)[1] if len(wav.shape)<2: signals[d].append(wav) file_names[d].append(filename) # find maximum of vector length N = max([len(v) for d in digits for v in signals[d]]) print(N)

# next we split our dataset in train and test # we will use a 80/20 random split. # create train/test split ix = np.arange(100) random.shuffle(ix) # select train entries ix_train = ix[:80] #select test entries ix_test = ix[80:] print(ix_train) print(ix_test)

# next we compute the average spectrum of each spoken digit in the training set. # we will consider a window up to 1.5 KHz # sampling rate is 8kHz Ts = 1.0/8000 ix_cut = int(np.ceil(1500*Ts*N)) # initialize dictionary for storing transforms transforms = {d: [] for d in digits} # initialize dictionary for storing mean transforms mean_transforms = {d: [] for d in digits} # compute mean transform of each digit and in the training set. # Make sure to only keep the spectrum up to 1.5kHz print(ix_cut)

# Code Solution to Q1 Here #signals associated with each digit in train dataset signals_train = {d:[] for d in digits} for digit in digits: for index in ix_train: signals_train[digit].append(signals[digit][index]) #STORE DTFFTs dtfft = {d:[] for d in digits} cut_dtfft = {d:[] for d in digits} phase = {d:[] for d in digits} phase_normal = {d:[] for d in digits} for digit in digits: instances = len(signals_train[digit]) #how many signals for each digit #get fft of each instance of the digit within an element for v in signals_train[digit]: dtfft[digit].append(np.fft.fft(v, n=N)) #cut frequencies with magnitude more than 1500 hz and use absolute value cut_dtfft[digit].append(np.fft.fft(v, n=N)[:ix_cut]) #keep first ix_cut entries average = (1/instances)* np.array(np.abs(cut_dtfft[digit])).sum(axis=0) #mean spectral magnitude of digit d transforms[digit] = average #mean spectral magnitude of digit d at multiple k normalized = transforms[digit] / np.sqrt(np.sum(np.abs(transforms[digit])**2)) mean_transforms[digit] = normalized

# In this next part, plot the average spectral magnitude of each digit.

# Code Solution to Q2 here #frequencies freq = fftpack.fftfreq(N, d=Ts)[:ix_cut] #plot Average spectral magnitude for recordings of digits 1 and 2 line1, = plt.plot(freq, mean_transforms[1], label="digit 1") line2, = plt.plot(freq, mean_transforms[2], label="digit 2") plt.xlabel("Frequency (Hz)") plt.ylabel("Average spectral magnitude (normalized)") leg = plt.legend(loc='upper right') plt.show()

# classifier function # receives a vector, computes the product with average digits, and returns the max inner product # Input: sample x (vector) def mean_classifier(x): #store values of dot products with different digits values = {d:[] for d in digits} #get dffft of the sginal x dfft_x = np.fft.fft(x, n=N)[:ix_cut] #truncate sginal x to exclude frequencies with magnitude greater than 1.5 hz for d in digits: #dot product dot = np.dot(np.abs(dfft_x), np.abs(mean_transforms[d])) values[d].append(dot) #get maximum digit in the dictionar max_digit = max(values, key=values.get) return max_digit

# Write answer for Q3b here #When the set of digits analyzed includes just 1 and 2 and we apply the classifier to the test set, we find a #90% average accuracy rate. Digit 1 is classified accurately 95% of times ; digit 2 is classified accurately 85% of times

# Code 3b Here #signals associated with each digit in test dataset signals_test = {d:[] for d in digits} #store accuracy accurate = 0 total_comparisons = 0 #accuracy per digit accurate_d = {d:0 for d in digits} for digit in digits: for index in ix_test: signals_test[digit].append(signals[digit][index]) #classify comparison as accurate if digit = digit labeled as most similar total_comparisons_d = len(signals_test[digit]) for v in signals_test[digit]: total_comparisons += 1 if digit == mean_classifier(v): accurate +=1 accurate_d[digit] += 1 accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100 #share of accurate classifications out of all comparisons accuracy = accurate / total_comparisons *100 print(accuracy) print(accurate_d)

# Write answer for Q4 here #When we classify only a few digits (1 and 2), the average accuracy of the classifier across digits is around 90%; #When we classify more digits (0,1,2,3,4), the average accuracy of the classifier is around 64%; the accuracy for #each digit classified is 80.0% for 0, 75.0% for 1, 50.0% for 2, 35.0% for 3, and 80.0% for 4 so there is substancial #variation in accuracy between digits. #when we classify all digits the accuracy is 54%, the accuracies for each digit are #{0: 45.0%, 1: 30.0%, 2: 50.0%, 3: 15.0%, 4: 70.0%, 5: 100.0%, 6: 55.00000000000001%, 7: 90.0%, 8: 50.0%, 9: 35.0%} #so digits are classified less accurately as we increase the number of digits we classify.

# Code Q4 here #extend the digits set in the second cell and run notebook: #store accuracy accurate = 0 total_comparisons = 0 #accuracy per digit accurate_d = {d:0 for d in digits} for digit in digits: #classify comparison as accurate if digit = digit labeled as most similar total_comparisons_d = len(signals_test[digit]) for v in signals_test[digit]: total_comparisons += 1 if digit == mean_classifier(v): accurate +=1 accurate_d[digit] += 1 accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100 #share of accurate classifications out of all comparisons accuracy = accurate / total_comparisons *100 print(accuracy) print(accurate_d)

# Code Q5 here #define new mean classifier function #digits =[1,2] def mean_classifier(x): #store values of dot products with different digits dot_values = {d:[] for d in digits} #get dffft of the signal x dfft_x = np.fft.fft(x, n=N)[:ix_cut] #truncate sginal x to exclude frequencies with magnitude greater than 1.5 hz for d in digits: #dot product with each signal for digit d for v in cut_dtfft[d]: dot_values[d].append(np.dot(np.abs(dfft_x), np.abs(v))) #get maximum digit in the dictionary of dot products max_digit = max(dot_values, key=dot_values.get) return max_digit accurate = 0 total_comparisons = 0 accurate_d = {d:0 for d in digits} for digit in digits: #classify comparison as accurate if digit = digit labeled as most similar total_comparisons_d = len(signals_test[digit]) for v in signals_test[digit]: total_comparisons += 1 if digit == mean_classifier(v): accurate +=1 accurate_d[digit] += 1 accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100 #share of accurate classifications out of all comparisons accuracy = accurate / total_comparisons *100 print(accuracy) print(accurate_d)