import numpy as np
import math
import numpy.random as random
from numpy.fft import fft
from scipy.io import wavfile
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline
sns.set()
sns.set(font_scale=1.5)
data_dir = './recordings/'
# determine digits of interest (0 to 9)
digits = [1,2] # change here to load more digits
# dictionary that will store our values
signals = {d:[] for d in digits}
file_names = {d:[] for d in digits}
# import files
for filename in os.listdir(data_dir):
# iterate over digits
for d in digits:
if filename.startswith(str(d)+'_'):
wav = wavfile.read(data_dir+filename)[1]
if len(wav.shape)<2:
signals[d].append(wav)
file_names[d].append(filename)
# find maximum of vector length
N = max([len(v) for d in digits for v in signals[d]])
# next we split our dataset in train and test
# we will use a 80/20 random split.
# create train/test split
ix = np.arange(100)
random.shuffle(ix)
# select train entries
ix_train = ix[:80]
#select test entries
ix_test = ix[80:]
# next we compute the average spectrum of each spoken digit in the training set.
# we will consider a window up to 1.5 KHz
# sampling rate is 8kHz
Ts = 1.0/8000
ix_cut = int(np.ceil(1500*Ts*N))
# initialize dictionary for storing transforms
transforms = {}
# initialize dictionary for storing mean transforms
mean_transforms = {}
# compute mean transform of each digit and in the training set.
# Make sure to only keep the spectrum up to 1.5kHz
# Code Solution to Q1 Here
transforms = {d:[] for d in digits}
av_spec_mag = {d:[] for d in digits}
mean_transforms = {d:[] for d in digits}
for d in digits:
for i in signals[d]:
transforms[d].append(fft(i,N))
for d in digits:
for i in range(len(transforms[d])):
transforms[d][i] = transforms[d][i][:ix_cut]
transforms_train = {d:[] for d in digits}
samples_test = {d:[] for d in digits}
for d in digits:
for i in ix_train:
transforms_train[d].append(transforms[d][i])
for d in digits:
for i in ix_test:
samples_test[d].append(signals[d][i])
for d in digits:
av_spec_mag[d] = sum([abs(ele) for ele in transforms_train[d]])/len(transforms_train[d])
for d in digits:
mean_transforms[d] = av_spec_mag[d]/math.sqrt(sum([abs(ele)**2 for ele in av_spec_mag[d]]))
# In this next part, plot the average spectral magnitude of each digit.
# Code Solution to Q2 here
x_values = np.linspace(0.0, ix_cut/Ts/N, len(av_spec_mag[digits[0]]))
for d in digits:
plt.plot(x_values, abs(av_spec_mag[d]))
plt.xlabel('Frequency (Hz)')
plt.ylabel('spectral amplitude')
plt.title(d)
plt.show()
# classifier function
# receives a vector, computes the product with average digits, and returns the max inner product
# Input: sample x (vector)
def mean_classifier(x):
transform_x = fft(x,N)
transform_x = transform_x[:ix_cut]
# Code Q3a Here
mean_classifiers = {d:[] for d in digits}
for d in digits:
mean_classifiers[d] = sum(np.multiply([abs(ele) for ele in transform_x],[abs(ele) for ele in mean_transforms[d]]))
classifier_values = np.zeros(len(digits))
return max(mean_classifiers, key=mean_classifiers.get)
# Write anser for Q3b here
# The classification works well but is not perfect.
# Code 3b Here
mean_classifiers = {d:[] for d in digits}
for d in digits:
for t in samples_test[d]:
mean_classifiers[d].append(mean_classifier(t))
for i in digits:
print(mean_classifiers[i])
# Write answer for Q4 here
# I tried the code below with different amounts of digits and I see that the more digits I classify, the less accurate the classification becomes.
# Code Q4 here
accuracy = {d:0 for d in digits}
for i in digits:
accuracy[i] = mean_classifiers[i].count(i)/len(mean_classifiers[i])*100
for i in digits:
print(f'The accuracy for digit {i} is {accuracy[i]} percent')
# Code Q5 here
# I am going to try to minimize the difference squared between the mean transform and the test sample
def diff_classifier(x):
transform_x = fft(x,N)
transform_x = transform_x[:ix_cut]
# Code Q3a Here
diff_classifiers = {d:[] for d in digits}
for d in digits:
diff_classifiers[d] = sum(np.subtract([abs(ele) for ele in transform_x],[abs(ele) for ele in mean_transforms[d]])**2)
diff_class_values = np.zeros(len(digits))
return min(diff_classifiers, key=diff_classifiers.get)
diff_classifiers = {d:[] for d in digits}
for d in digits:
for t in samples_test[d]:
diff_classifiers[d].append(diff_classifier(t))
for i in digits:
print(diff_classifiers[i])