import numpy as np
import numpy.random as random
from numpy.fft import fft
from scipy.io import wavfile
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline
sns.set()
sns.set(font_scale=1.5)
data_dir = './recordings/'
# determine digits of interest (0 to 9)
digits = [1,2] # change here to load more digits
# dictionary that will store our values
signals = {d:[] for d in digits}
file_names = {d:[] for d in digits}
# import files
for filename in os.listdir(data_dir):
    # iterate over digits
    for d in digits:
        if filename.startswith(str(d)+'_'):
            wav = wavfile.read(data_dir+filename)[1]
            if len(wav.shape)<2:
                signals[d].append(wav)
                file_names[d].append(filename)
        
# find maximum of vector length
N = max([len(v) for d in digits for v in signals[d]])
# next we split our dataset in train and test
# we will use a 80/20 random split.
# create train/test split
ix = np.arange(100)
random.shuffle(ix)
# select train entries
ix_train = ix[:80]
#select test entries
ix_test = ix[80:]
# next we compute the average spectrum of each spoken digit in the training set.
# we will consider a window up to 1.5 KHz
# sampling rate is 8kHz
Ts = 1.0/8000
ix_cut = int(np.ceil(1500*Ts*N))
# initialize dictionary for storing transforms
transforms = {d:[] for d in digits}
# initialize dictionary for storing mean transforms
mean_transforms = {d:[] for d in digits}
# compute mean transform of each digit and in the training set. 
# Make sure to only keep the spectrum up to 1.5kHz
# Code Solution to Q1 Here
norm_transforms = {d:[] for d in digits}
X_bar = {d:[] for d in digits}
# Loop round for each digit e.g. 0, 1, 2, ..., 9
for d in digits:
    # Loop round all wav files for each digit and store transform in dict
    for i in ix_train:
        transforms[d].append(fft(signals[d][i], n=N)[:ix_cut+1])
    
    # Find norm of transforms of digit d
    norm_transforms[d] = np.abs(transforms[d])
    for freq in range(ix_cut+1):
        X_tilde_sum = 0
        for sample in norm_transforms[d]:
            X_tilde_sum = X_tilde_sum + sample[freq]
        X_bar[d].append(X_tilde_sum/80)
    for freq in range(ix_cut+1):
        mean_transforms[d].append(X_bar[d][freq]/np.linalg.norm(X_bar[d]))
# In this next part, plot the average spectral magnitude of each digit. 
# Code Solution to Q2 here
xf = np.linspace(0, 1500, (ix_cut+1))
for d in digits:
    yf = mean_transforms[d]
    plt.plot(xf, yf)
plt.xlabel("Hz")
plt.show()
# classifier function
# receives a vector, computes the product with average digits, and returns the max inner product
# Input: sample x (vector)
def mean_classifier(x):
    X = fft(x, n=N)[:ix_cut+1]
    max_value = 0
    max_sum = 0
    for d in digits:
        running_total = 0
        for k in range(ix_cut+1):
            running_total  += abs(X[k]) * abs(mean_transforms[d][k])
        if (max_sum == 0):
            max_sum = running_total
            max_value = d
        elif (running_total > max_sum):
            max_sum = running_total
            max_value = d
    return max_value
# Write answer for Q3b here
# Code 3b Here
def classifier_test():
    correct = 0
    total = 0
    for d in digits:
        d_correct = 0
        d_total = 0
        for i in ix_test:
            estimate = mean_classifier(signals[d][i])
                        
            if (estimate == d):
                correct += 1
                d_correct += 1
            d_total += 1
            total += 1
        print(str(d) + ": " + str(d_correct) + "/" + str(d_total) + ", score: " + str(np.round(d_correct/d_total * 100)) + "%")
    print("Overall: " + str(correct) + "/" + str(total) + ", score: " + str(np.round(correct/total * 100)) + "%")   
    
    return
classifier_test()
# Write answer for Q4 here
# The correctness is still bad (overall 57% here) however, this is still better at guessing (which would be around 10%)
# So despite the percentage getting worse with more digits, the accuracy is still pretty good.
# Code Q4 here
data_dir = './recordings/'
# determine digits of interest (0 to 9)
digits = [1,2,3,4,5,6,7,8,9,0] # change here to load more digits
# dictionary that will store our values
signals = {d:[] for d in digits}
file_names = {d:[] for d in digits}
# import files
for filename in os.listdir(data_dir):
    # iterate over digits
    for d in digits:
        if filename.startswith(str(d)+'_'):
            wav = wavfile.read(data_dir+filename)[1]
            if len(wav.shape)<2:
                signals[d].append(wav)
                file_names[d].append(filename)
        
# find maximum of vector length
N = max([len(v) for d in digits for v in signals[d]])
# next we split our dataset in train and test
# we will use a 80/20 random split.
# create train/test split
ix = np.arange(100)
random.shuffle(ix)
# select train entries
ix_train = ix[:80]
#select test entries
ix_test = ix[80:]
# next we compute the average spectrum of each spoken digit in the training set.
# we will consider a window up to 1.5 KHz
# sampling rate is 8kHz
Ts = 1.0/8000
ix_cut = int(np.ceil(1500*Ts*N))
# initialize dictionary for storing transforms
transforms = {d:[] for d in digits}
# initialize dictionary for storing mean transforms
mean_transforms = {d:[] for d in digits}
# compute mean transform of each digit and in the training set. 
# Make sure to only keep the spectrum up to 1.5kHz
norm_transforms = {d:[] for d in digits}
X_bar = {d:[] for d in digits}
# Loop round for each digit e.g. 0, 1, 2, ..., 9
for d in digits:
    # Loop round all wav files for each digit and store transform in dict
    for i in ix_train:
        transforms[d].append(fft(signals[d][i], n=N)[:ix_cut+1])
    
    # Find norm of transforms of digit d
    norm_transforms[d] = np.abs(transforms[d])
    for freq in range(ix_cut+1):
        X_tilde_sum = 0
        for sample in norm_transforms[d]:
            X_tilde_sum = X_tilde_sum + sample[freq]
        X_bar[d].append(X_tilde_sum/80)
    for freq in range(ix_cut+1):
        mean_transforms[d].append(X_bar[d][freq]/np.linalg.norm(X_bar[d]))
classifier_test()
# Code Q5 here