import numpy as np
import numpy.random as random
from numpy.fft import fft
from scipy.io import wavfile
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline
sns.set()
sns.set(font_scale=1.5)
data_dir = './recordings/'
# determine digits of interest (0 to 9)
digits = [1,2,4,5] # change here to load more digits
# dictionary that will store our values
signals = {d:[] for d in digits}
file_names = {d:[] for d in digits}
# import files
for filename in os.listdir(data_dir):
# iterate over digits
for d in digits:
if filename.startswith(str(d)+'_'):
wav = wavfile.read(data_dir+filename)[1]
if len(wav.shape)<2:
signals[d].append(wav)
file_names[d].append(filename)
# find maximum of vector length
N = max([len(v) for d in digits for v in signals[d]])
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:15: WavFileWarning: Chunk (non-data) not understood, skipping it.
from ipykernel import kernelapp as app
# next we split our dataset in train and test
# we will use a 80/20 random split.
# create train/test split
ix = np.arange(100)
random.shuffle(ix)
# select train entries
ix_train = ix[:80]
#select test entries
ix_test = ix[80:]
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
96 97 98 99]
# next we compute the average spectrum of each spoken digit in the training set.
# we will consider a window up to 1.5 KHz
# sampling rate is 8kHz
Ts = 1.0/8000
ix_cut = int(np.ceil(1500*Ts*N))
print(ix_cut)
# initialize dictionary for storing transforms
transforms = {}
# initialize dictionary for storing mean transforms
mean_transforms = {}
# compute mean transform of each digit and in the training set.
# Make sure to only keep the spectrum up to 1.5kHz
1538
# Code Solution to Q1 Here
transforms_for_digit = []
for i in digits:
for d in signals[i]:
transforms_for_digit.append(fft(d))
transforms[i] = np.abs(transforms_for_digit[i])
transforms[i] = transforms[i][transforms[i] <= ix_cut]
transforms[i] = np.sum(transforms[i] / len(transforms_for_digit[i]))
mean_transforms[i] = transforms[i] / (np.sqrt(np.sum((np.abs(transforms[i]))**2)))
print(transforms)
print(mean_transforms)
{1: 378.57869605082783, 2: 374.77252427549615, 4: 85.70684431916625, 5: 340.4897910126738}
{1: 1.0, 2: 1.0, 4: 1.0, 5: 1.0}
# In this next part, plot the average spectral magnitude of each digit.
# Code Solution to Q2 here
x = digits
y = transforms.values()
plt.plot(x,y)
# classifier function
# receives a vector, computes the product with average digits, and returns the max inner product
# Input: sample x (vector)
def mean_classifier(x):
trans_for_inner = []
max_inner = 0
temp_transforms = {}
temp_mean_transforms = {}
for i in digits:
for d in x[i]:
trans_for_inner.append(fft(d))
temp_transforms[i] = np.abs(trans_for_inner[i])
temp_transforms[i] = temp_transforms[i][temp_transforms[i] <= ix_cut]
temp_transforms[i] = np.sum(temp_transforms[i] / len(temp_transforms_for_digit[i]))
temp_mean_transforms[i] = temp_transforms[i] / (np.sqrt(np.sum((np.abs(temp_transforms[i]))**2)))
sim = np.inner(temp_mean_transforms[i],trans_for_inner[i])
if sim > max_inner:
max_inner = sim
output = digits[i]
return output
# Code Q3a Here
# Write anser for Q3b here
print(ix_test)
mean_classifier(ix_test)
[34 69 47 86 78 18 57 49 88 54 80 16 92 74 93 2 1 25 11 12]
TypeError: 'numpy.int64' object is not iterable
# Code 3b Here
# Write answer for Q4 here
''' I did not get it to work, but I imagine the accuract increases when you classify
more digits because we have more datapoints to include in our normalizations and
general calculations '''
# Code Q4 here
# Write your answer here