Pset6-Voice Recognition

import numpy as np import math import numpy.random as random from numpy.fft import fft from scipy.io import wavfile import matplotlib.pyplot as plt import seaborn as sns import os %matplotlib inline sns.set() sns.set(font_scale=1.5)

data_dir = './recordings/' # determine digits of interest (0 to 9) digits = [1,2] # change here to load more digits # dictionary that will store our values signals = {d:[] for d in digits} file_names = {d:[] for d in digits} # import files for filename in os.listdir(data_dir): # iterate over digits for d in digits: if filename.startswith(str(d)+'_'): wav = wavfile.read(data_dir+filename)[1] if len(wav.shape)<2: signals[d].append(wav) file_names[d].append(filename) # find maximum of vector length N = max([len(v) for d in digits for v in signals[d]])

# next we split our dataset in train and test # we will use a 80/20 random split. # create train/test split ix = np.arange(100) random.shuffle(ix) # select train entries ix_train = ix[:80] #select test entries ix_test = ix[80:]

# next we compute the average spectrum of each spoken digit in the training set. # we will consider a window up to 1.5 KHz # sampling rate is 8kHz Ts = 1.0/8000 ix_cut = int(np.ceil(1500*Ts*N)) # initialize dictionary for storing transforms transforms = {} # initialize dictionary for storing mean transforms mean_transforms = {} # compute mean transform of each digit and in the training set. # Make sure to only keep the spectrum up to 1.5kHz

# Code Solution to Q1 Here transforms = {d:[] for d in digits} av_spec_mag = {d:[] for d in digits} mean_transforms = {d:[] for d in digits} for d in digits: for i in signals[d]: transforms[d].append(fft(i,N)) for d in digits: for i in range(len(transforms[d])): transforms[d][i] = transforms[d][i][:ix_cut] transforms_train = {d:[] for d in digits} samples_test = {d:[] for d in digits} for d in digits: for i in ix_train: transforms_train[d].append(transforms[d][i]) for d in digits: for i in ix_test: samples_test[d].append(signals[d][i]) for d in digits: av_spec_mag[d] = sum([abs(ele) for ele in transforms_train[d]])/len(transforms_train[d]) for d in digits: mean_transforms[d] = av_spec_mag[d]/math.sqrt(sum([abs(ele)**2 for ele in av_spec_mag[d]]))

# In this next part, plot the average spectral magnitude of each digit.

# Code Solution to Q2 here x_values = np.linspace(0.0, ix_cut/Ts/N, len(av_spec_mag[digits[0]])) for d in digits: plt.plot(x_values, abs(av_spec_mag[d])) plt.xlabel('Frequency (Hz)') plt.ylabel('spectral amplitude') plt.title(d) plt.show()

# classifier function # receives a vector, computes the product with average digits, and returns the max inner product # Input: sample x (vector) def mean_classifier(x): transform_x = fft(x,N) transform_x = transform_x[:ix_cut] # Code Q3a Here mean_classifiers = {d:[] for d in digits} for d in digits: mean_classifiers[d] = sum(np.multiply([abs(ele) for ele in transform_x],[abs(ele) for ele in mean_transforms[d]])) classifier_values = np.zeros(len(digits)) return max(mean_classifiers, key=mean_classifiers.get)

# Write anser for Q3b here # The classification works well but is not perfect.

# Code 3b Here mean_classifiers = {d:[] for d in digits} for d in digits: for t in samples_test[d]: mean_classifiers[d].append(mean_classifier(t)) for i in digits: print(mean_classifiers[i])

# Write answer for Q4 here # I tried the code below with different amounts of digits and I see that the more digits I classify, the less accurate the classification becomes.

# Code Q4 here accuracy = {d:0 for d in digits} for i in digits: accuracy[i] = mean_classifiers[i].count(i)/len(mean_classifiers[i])*100 for i in digits: print(f'The accuracy for digit {i} is {accuracy[i]} percent')

# Code Q5 here # I am going to try to minimize the difference squared between the mean transform and the test sample def diff_classifier(x): transform_x = fft(x,N) transform_x = transform_x[:ix_cut] # Code Q3a Here diff_classifiers = {d:[] for d in digits} for d in digits: diff_classifiers[d] = sum(np.subtract([abs(ele) for ele in transform_x],[abs(ele) for ele in mean_transforms[d]])**2) diff_class_values = np.zeros(len(digits)) return min(diff_classifiers, key=diff_classifiers.get) diff_classifiers = {d:[] for d in digits} for d in digits: for t in samples_test[d]: diff_classifiers[d].append(diff_classifier(t)) for i in digits: print(diff_classifiers[i])