import numpy as np
import numpy.random as random
#from numpy.fft import fft
from scipy.io import wavfile
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy import fftpack #had to import because couldn't use np.fft.fftfreq
%matplotlib inline
sns.set()
sns.set(font_scale=1.5)
data_dir = './recordings/'
# determine digits of interest (0 to 9)
digits = [1,2] # change here to load more digits
# dictionary that will store our values
signals = {d:[] for d in digits}
file_names = {d:[] for d in digits}
# import files
for filename in os.listdir(data_dir):
# iterate over digits
for d in digits:
if filename.startswith(str(d)+'_'):
wav = wavfile.read(data_dir+filename)[1]
if len(wav.shape)<2:
signals[d].append(wav)
file_names[d].append(filename)
# find maximum of vector length
N = max([len(v) for d in digits for v in signals[d]])
print(N)
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:15: WavFileWarning: Chunk (non-data) not understood, skipping it.
from ipykernel import kernelapp as app
6112
# next we split our dataset in train and test
# we will use a 80/20 random split.
# create train/test split
ix = np.arange(100)
random.shuffle(ix)
# select train entries
ix_train = ix[:80]
#select test entries
ix_test = ix[80:]
print(ix_train)
print(ix_test)
[29 30 66 86 0 63 28 70 62 21 79 45 40 68 98 17 37 57 96 56 73 95 97 5
82 65 87 55 22 69 8 24 13 92 34 4 61 14 76 38 18 90 67 94 81 53 59 77
39 26 33 35 58 15 78 31 71 41 74 6 80 88 9 72 20 23 83 84 50 47 75 89
85 99 11 43 51 1 25 49]
[ 3 36 44 2 12 60 91 10 48 52 42 54 93 16 46 7 27 19 64 32]
# next we compute the average spectrum of each spoken digit in the training set.
# we will consider a window up to 1.5 KHz
# sampling rate is 8kHz
Ts = 1.0/8000
ix_cut = int(np.ceil(1500*Ts*N))
# initialize dictionary for storing transforms
transforms = {d: [] for d in digits}
# initialize dictionary for storing mean transforms
mean_transforms = {d: [] for d in digits}
# compute mean transform of each digit and in the training set.
# Make sure to only keep the spectrum up to 1.5kHz
print(ix_cut)
1146
# Code Solution to Q1 Here
#signals associated with each digit in train dataset
signals_train = {d:[] for d in digits}
for digit in digits:
for index in ix_train:
signals_train[digit].append(signals[digit][index])
#STORE DTFFTs
dtfft = {d:[] for d in digits}
cut_dtfft = {d:[] for d in digits}
phase = {d:[] for d in digits}
phase_normal = {d:[] for d in digits}
for digit in digits:
instances = len(signals_train[digit]) #how many signals for each digit
#get fft of each instance of the digit within an element
for v in signals_train[digit]:
dtfft[digit].append(np.fft.fft(v, n=N))
#cut frequencies with magnitude more than 1500 hz and use absolute value
cut_dtfft[digit].append(np.fft.fft(v, n=N)[:ix_cut]) #keep first ix_cut entries
average = (1/instances)* np.array(np.abs(cut_dtfft[digit])).sum(axis=0) #mean spectral magnitude of digit d
transforms[digit] = average
#mean spectral magnitude of digit d at multiple k
normalized = transforms[digit] / np.sqrt(np.sum(np.abs(transforms[digit])**2))
mean_transforms[digit] = normalized
# In this next part, plot the average spectral magnitude of each digit.
# Code Solution to Q2 here
#frequencies
freq = fftpack.fftfreq(N, d=Ts)[:ix_cut]
#plot Average spectral magnitude for recordings of digits 1 and 2
line1, = plt.plot(freq, mean_transforms[1], label="digit 1")
line2, = plt.plot(freq, mean_transforms[2], label="digit 2")
plt.xlabel("Frequency (Hz)")
plt.ylabel("Average spectral magnitude (normalized)")
leg = plt.legend(loc='upper right')
plt.show()
# classifier function
# receives a vector, computes the product with average digits, and returns the max inner product
# Input: sample x (vector)
def mean_classifier(x):
#store values of dot products with different digits
values = {d:[] for d in digits}
#get dffft of the sginal x
dfft_x = np.fft.fft(x, n=N)[:ix_cut] #truncate sginal x to exclude frequencies with magnitude greater than 1.5 hz
for d in digits:
#dot product
dot = np.dot(np.abs(dfft_x), np.abs(mean_transforms[d]))
values[d].append(dot)
#get maximum digit in the dictionar
max_digit = max(values, key=values.get)
return max_digit
# Write answer for Q3b here
#When the set of digits analyzed includes just 1 and 2 and we apply the classifier to the test set, we find a
#90% average accuracy rate. Digit 1 is classified accurately 95% of times ; digit 2 is classified accurately 85% of times
# Code 3b Here
#signals associated with each digit in test dataset
signals_test = {d:[] for d in digits}
#store accuracy
accurate = 0
total_comparisons = 0
#accuracy per digit
accurate_d = {d:0 for d in digits}
for digit in digits:
for index in ix_test:
signals_test[digit].append(signals[digit][index])
#classify comparison as accurate if digit = digit labeled as most similar
total_comparisons_d = len(signals_test[digit])
for v in signals_test[digit]:
total_comparisons += 1
if digit == mean_classifier(v):
accurate +=1
accurate_d[digit] += 1
accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100
#share of accurate classifications out of all comparisons
accuracy = accurate / total_comparisons *100
print(accuracy)
print(accurate_d)
92.5
{1: 95.0, 2: 90.0}
# Write answer for Q4 here
#When we classify only a few digits (1 and 2), the average accuracy of the classifier across digits is around 90%;
#When we classify more digits (0,1,2,3,4), the average accuracy of the classifier is around 64%; the accuracy for
#each digit classified is 80.0% for 0, 75.0% for 1, 50.0% for 2, 35.0% for 3, and 80.0% for 4 so there is substancial
#variation in accuracy between digits.
#when we classify all digits the accuracy is 54%, the accuracies for each digit are
#{0: 45.0%, 1: 30.0%, 2: 50.0%, 3: 15.0%, 4: 70.0%, 5: 100.0%, 6: 55.00000000000001%, 7: 90.0%, 8: 50.0%, 9: 35.0%}
#so digits are classified less accurately as we increase the number of digits we classify.
# Code Q4 here
#extend the digits set in the second cell and run notebook:
#store accuracy
accurate = 0
total_comparisons = 0
#accuracy per digit
accurate_d = {d:0 for d in digits}
for digit in digits:
#classify comparison as accurate if digit = digit labeled as most similar
total_comparisons_d = len(signals_test[digit])
for v in signals_test[digit]:
total_comparisons += 1
if digit == mean_classifier(v):
accurate +=1
accurate_d[digit] += 1
accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100
#share of accurate classifications out of all comparisons
accuracy = accurate / total_comparisons *100
print(accuracy)
print(accurate_d)
92.5
{1: 95.0, 2: 90.0}
# Code Q5 here
#define new mean classifier function
#digits =[1,2]
def mean_classifier(x):
#store values of dot products with different digits
dot_values = {d:[] for d in digits}
#get dffft of the signal x
dfft_x = np.fft.fft(x, n=N)[:ix_cut] #truncate sginal x to exclude frequencies with magnitude greater than 1.5 hz
for d in digits:
#dot product with each signal for digit d
for v in cut_dtfft[d]:
dot_values[d].append(np.dot(np.abs(dfft_x), np.abs(v)))
#get maximum digit in the dictionary of dot products
max_digit = max(dot_values, key=dot_values.get)
return max_digit
accurate = 0
total_comparisons = 0
accurate_d = {d:0 for d in digits}
for digit in digits:
#classify comparison as accurate if digit = digit labeled as most similar
total_comparisons_d = len(signals_test[digit])
for v in signals_test[digit]:
total_comparisons += 1
if digit == mean_classifier(v):
accurate +=1
accurate_d[digit] += 1
accurate_d[digit] = accurate_d[digit] / total_comparisons_d *100
#share of accurate classifications out of all comparisons
accuracy = accurate / total_comparisons *100
print(accuracy)
print(accurate_d)
50.0
{1: 0.0, 2: 100.0}