import numpy as np
import numpy.random as random
#from numpy.fft import fft
from scipy.io import wavfile
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy import fftpack #had to import because couldn't use np.fft.fftfreq
%matplotlib inline
sns.set()
sns.set(font_scale=1.5)
data_dir = './recordings/'
# determine digits of interest (0 to 9)
digits = [1,2] # change here to load more digits
# dictionary that will store our values
signals = {d:[] for d in digits}
file_names = {d:[] for d in digits}
# import files
for filename in os.listdir(data_dir):
# iterate over digits
for d in digits:
if filename.startswith(str(d)+'_'):
wav = wavfile.read(data_dir+filename)[1]
if len(wav.shape)<2:
signals[d].append(wav)
file_names[d].append(filename)
# find maximum of vector length
N = max([len(v) for d in digits for v in signals[d]])
print(N)
# next we split our dataset in train and test
# we will use a 80/20 random split.
# create train/test split
ix = np.arange(100)
random.shuffle(ix)
# select train entries
ix_train = ix[:80]
#select test entries
ix_test = ix[80:]
print(ix_train)
print(ix_test)
# next we compute the average spectrum of each spoken digit in the training set.
# we will consider a window up to 1.5 KHz
# sampling rate is 8kHz
Ts = 1.0/8000
ix_cut = int(np.ceil(1500*Ts*N))
# initialize dictionary for storing transforms
transforms = {d: [] for d in digits}
# initialize dictionary for storing mean transforms
mean_transforms = {d: [] for d in digits}
# compute mean transform of each digit and in the training set.
# Make sure to only keep the spectrum up to 1.5kHz
print(ix_cut)
# Code Solution to Q1 Here
#signals associated with each digit in train dataset
signals_train = {d:[] for d in digits}
for digit in digits:
for index in ix_train:
signals_train[digit].append(signals[digit][index])
#STORE DTFFTs
dtfft = {d:[] for d in digits}
cut_dtfft = {d:[] for d in digits}
phase = {d:[] for d in digits}
phase_normal = {d:[] for d in digits}
for digit in digits:
instances = len(signals_train[digit]) #how many signals for each digit
#get fft of each instance of the digit within an element
for v in signals_train[digit]:
dtfft[digit].append(np.fft.fft(v, n=N))
#cut frequencies with magnitude more than 1500 hz and use absolute value
cut_dtfft[digit].append(np.fft.fft(v, n=N)[:ix_cut]) #keep first ix_cut entries
average = (1/instances)* np.array(np.abs(cut_dtfft[digit])).sum(axis=0) #mean spectral magnitude of digit d
transforms[digit] = average
#mean spectral magnitude of digit d at multiple k
normalized = transforms[digit] / np.sqrt(np.sum(np.abs(transforms[digit])**2))
mean_transforms[digit] = normalized
# In this next part, plot the average spectral magnitude of each digit.
# Code Solution to Q2 here
#frequencies
freq = fftpack.fftfreq(N, d=Ts)[:ix_cut]
#plot Average spectral magnitude for recordings of digits 1 and 2
line1, = plt.plot(freq, mean_transforms[1], label="digit 1")
line2, = plt.plot(freq, mean_transforms[2], label="digit 2")
plt.xlabel("Frequency (Hz)")
plt.ylabel("Average spectral magnitude (normalized)")
leg = plt.legend(loc='upper right')
plt.show()
# classifier function
# receives a vector, computes the product with average digits, and returns the max inner product
# Input: sample x (vector)
def mean_classifier(x):
#store values of dot products with different digits
values = {d:[] for d in digits}
#get dffft of the sginal x
dfft_x = np.fft.fft(x, n=N)[:ix_cut] #truncate sginal x to exclude frequencies with magnitude greater than 1.5 hz
for d in digits:
#dot product
dot = np.dot(np.abs(dfft_x), np.abs(mean_transforms[d]))
values[d].append(dot)
#get maximum digit in the dictionar
max_digit = max(values, key=values.get)
return max_digit
# Write answer for Q3b here
#When the set of digits analyzed includes just 1 and 2 and we apply the classifier to the test set, we find a
#90% average accuracy rate. Digit 1 is classified accurately 95% of times ; digit 2 is classified accurately 85% of times
# Code 3b Here
#signals associated with each digit in test dataset
signals_test = {d:[] for d in digits}
#store accuracy
accurate = 0
total_comparisons = 0
#accuracy per digit
accurate_d = {d:0 for d in digits}
for digit in digits:
for index in ix_test:
signals_test[digit].append(signals[digit][index])
#classify comparison as accurate if digit = digit labeled as most similar
total_comparisons_d = len(signals_test[digit])
for v in signals_test[digit]:
total_comparisons += 1
if digit == mean_classifier(v):
accurate +=1
accurate_d[digit] += 1
accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100
#share of accurate classifications out of all comparisons
accuracy = accurate / total_comparisons *100
print(accuracy)
print(accurate_d)
# Write answer for Q4 here
#When we classify only a few digits (1 and 2), the average accuracy of the classifier across digits is around 90%;
#When we classify more digits (0,1,2,3,4), the average accuracy of the classifier is around 64%; the accuracy for
#each digit classified is 80.0% for 0, 75.0% for 1, 50.0% for 2, 35.0% for 3, and 80.0% for 4 so there is substancial
#variation in accuracy between digits.
#when we classify all digits the accuracy is 54%, the accuracies for each digit are
#{0: 45.0%, 1: 30.0%, 2: 50.0%, 3: 15.0%, 4: 70.0%, 5: 100.0%, 6: 55.00000000000001%, 7: 90.0%, 8: 50.0%, 9: 35.0%}
#so digits are classified less accurately as we increase the number of digits we classify.
# Code Q4 here
#extend the digits set in the second cell and run notebook:
#store accuracy
accurate = 0
total_comparisons = 0
#accuracy per digit
accurate_d = {d:0 for d in digits}
for digit in digits:
#classify comparison as accurate if digit = digit labeled as most similar
total_comparisons_d = len(signals_test[digit])
for v in signals_test[digit]:
total_comparisons += 1
if digit == mean_classifier(v):
accurate +=1
accurate_d[digit] += 1
accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100
#share of accurate classifications out of all comparisons
accuracy = accurate / total_comparisons *100
print(accuracy)
print(accurate_d)
# Code Q5 here
#define new mean classifier function
#digits =[1,2]
def mean_classifier(x):
#store values of dot products with different digits
dot_values = {d:[] for d in digits}
#get dffft of the signal x
dfft_x = np.fft.fft(x, n=N)[:ix_cut] #truncate sginal x to exclude frequencies with magnitude greater than 1.5 hz
for d in digits:
#dot product with each signal for digit d
for v in cut_dtfft[d]:
dot_values[d].append(np.dot(np.abs(dfft_x), np.abs(v)))
#get maximum digit in the dictionary of dot products
max_digit = max(dot_values, key=dot_values.get)
return max_digit
accurate = 0
total_comparisons = 0
accurate_d = {d:0 for d in digits}
for digit in digits:
#classify comparison as accurate if digit = digit labeled as most similar
total_comparisons_d = len(signals_test[digit])
for v in signals_test[digit]:
total_comparisons += 1
if digit == mean_classifier(v):
accurate +=1
accurate_d[digit] += 1
accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100
#share of accurate classifications out of all comparisons
accuracy = accurate / total_comparisons *100
print(accuracy)
print(accurate_d)