import numpy as np
import numpy.random as random
from numpy.fft import fft
from scipy.io import wavfile
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline
sns.set()
sns.set(font_scale=1.5)
data_dir = './recordings/'
# determine digits of interest (0 to 9)
digits = [0,1,2,3,4,5,6,7,8,9] # change here to load more digits
# dictionary that will store our values
signals = {d:[] for d in digits}
file_names = {d:[] for d in digits}
# import files
for filename in os.listdir(data_dir):
# iterate over digits
for d in digits:
if filename.startswith(str(d)+'_'):
wav = wavfile.read(data_dir+filename)[1]
if len(wav.shape)<2:
signals[d].append(wav)
file_names[d].append(filename)
# find maximum of vector length
N = max([len(v) for d in digits for v in signals[d]])
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:15: WavFileWarning: Chunk (non-data) not understood, skipping it.
from ipykernel import kernelapp as app
# next we split our dataset in train and test
# we will use a 80/20 random split.
# create train/test split
ix = np.arange(100)
random.shuffle(ix)
# select train entries
ix_train = ix[:80]
#select test entries
ix_test = ix[80:]
# next we compute the average spectrum of each spoken digit in the training set.
# we will consider a window up to 1.5 KHz
# sampling rate is 8kHz
Ts = 1.0/8000
ix_cut = int(np.ceil(1500*Ts*N))
# initialize dictionary for storing transforms
transforms = {d:[] for d in digits}
# initialize dictionary for storing mean transforms
mean_transforms = {}
# compute mean transform of each digit and in the training set.
# Make sure to only keep the spectrum up to 1.5kHz
# Code Solution to Q1 Here
for d in digits:
for i in ix_train:
yt = signals[d][i]
yf = fft(yt)
if len(yf) < ix_cut:
yf = np.append(yf,np.zeros(ix_cut-len(yf)))
transforms[d].append(yf[:ix_cut])
Xd = (1/len(ix_train)) * np.sum(np.abs(transforms[d]),axis=0)
norm = np.sqrt(np.sum(np.abs(Xd**2),axis=0))
mean_transforms[d] = Xd/norm
# In this next part, plot the average spectral magnitude of each digit.
# Code Solution to Q2 here
plt.figure(figsize=(15,5))
for d in digits:
xf = np.linspace(0.0,1500, int(ix_cut))
plt.plot(xf, 2.0/ix_cut * np.abs(mean_transforms[d][0:int(ix_cut)]),label='%s' % d)
plt.xlabel("Hz")
plt.ylabel("Average spectral magnitude")
plt.legend()
plt.show()
# classifier function
# receives a vector, computes the product with average digits, and returns the max inner product
# Input: sample x (vector)
# Code Q3a Here
def mean_classifier(x):
p_max = 0
d_max = 1
for d in digits:
# Calculate p(X,Xd)
p = np.sum(np.abs(x) * np.abs(mean_transforms[d]))
if p > p_max:
d_max = d
p_max = p
return d_max
# Write answer for Q3b here
# Step 1: compute mean transforms of test set signals
# Step 2: use mean classifier to compare training mean transform with test mean transform
# Code 3b Here
test_transforms = {d:[] for d in digits}
test_mean_transforms = {}
for d in [1,2]:
correct = 0
for i in ix_test:
yt = signals[d][i]
yf = fft(yt)[:ix_cut]
if len(yf) < ix_cut:
yf = np.append(yf,np.zeros(ix_cut-len(yf)))
class_d = mean_classifier(yf)
if class_d == d:
correct += 1
a = correct/len(ix_test)
print(f'Accuracy for {d:d}: {a:.2f}')
# Use mean_classifier to determine digit with highest similiarity
Accuracy for 1: 0.70
Accuracy for 2: 0.80
# Write answer for Q4 here
# The accuracy would decrease because it has more spectral magnitude signals of each of the digits to compare with.
# Code Q4 here
correct = 0
for d in digits:
correct = 0
for i in ix_test:
yt = signals[d][i]
yf = fft(yt)[:ix_cut]
if len(yf) < ix_cut:
yf = np.append(yf,np.zeros(ix_cut-len(yf)))
class_d = mean_classifier(yf)
if class_d == d:
correct += 1
a = correct/len(ix_test)
print(f'Accuracy for {d:d}: {a:.2f}')
Accuracy for 0: 0.45
Accuracy for 1: 0.10
Accuracy for 2: 0.20
Accuracy for 3: 0.10
Accuracy for 4: 0.25
Accuracy for 5: 0.60
Accuracy for 6: 0.40
Accuracy for 7: 0.25
Accuracy for 8: 0.60
Accuracy for 9: 0.65
# Write your answer here