PSet7-Shazam - Duplicate

from pydub import AudioSegment from scipy import signal from operator import itemgetter import pyaudio import numpy as np import utils import os import sys import warnings import operator import matplotlib.mlab as mlab import matplotlib.pyplot as plt

FORMAT = pyaudio.paInt16 ''' Number of audio channels in the recording ''' CHANNELS = 2 ''' Original sample rate of the recordings ''' SAMPLE_RATE = 44100 ''' Sampling rate (after downsampling) ''' FS = 8000 ''' Factor by which the original signal will be downsampled ''' DECIMATION_FACTOR = SAMPLE_RATE/FS ''' Size of the FFT window, affects frequency granularity (we saw this in class!) ''' WINDOW_SIZE = 1024 ''' Degree to which a fingerprint can be paired with its neighbors -- higher will cause more fingerprints, but potentially better accuracy. ''' FAN_VALUE = 15 ''' Ratio by which each window overlaps the previous and next window -- higher will cause more fingerprints, but higher granularity of offset matching ''' OVERLAP_RATIO = 0.5 path = os.getcwd() warnings.filterwarnings("ignore", message="divide by zero encountered in log10") warnings.filterwarnings("ignore", category=DeprecationWarning) ''' time between peaks ''' TIME_DIFF = 150. #numSamples ''' frequency granularity fan value last comment: higher overlap gives more fingerprints? path and warnings stuff? '''

# Database with key=songname, value=[channel1, channel2] SongDb = {} #Goes through mp3s folder and adds each song to database for filename in os.listdir(path + "/mp3s/"): audiofile = AudioSegment.from_file(path + "/mp3s/" + filename) data = np.fromstring(audiofile._data, np.int16) channels = [] for chn in range(audiofile.channels): channels.append(data[chn::audiofile.channels]) SongDb[filename[:-3]] = channels print("Added to song database: " + str(filename[:-4]))

def Preprocess(channels): channel1 = channels[0] channel2 = channels[1] channelmean = ((channel1 + channel2)/2 - np.mean(channel1 + channel2)) resampled = signal.decimate(channelmean, int(DECIMATION_FACTOR)) return resampled

# Database with key=songname, value=processed signal ProcessedDb = {} #Processes each song and adds it to ProcessedDb #Prints table of number of samples in for each song print('{0:65}{1:22}{2:20}\n'.format('Song Name', 'Original #Samples', 'Processed #Samples')) for song, sig in SongDb.items(): processed = Preprocess(sig) ProcessedDb[song] = processed original_duration = len(sig[0]) processed_duration = len(processed) print('{0:50}{1:32d}{2:20d}'.format(song, original_duration, processed_duration))

def getSpectrogram(signal): spectrum_magnitudes = mlab.specgram(signal, noverlap = OVERLAP_RATIO * WINDOW_SIZE, NFFT = WINDOW_SIZE, Fs = FS )[0] #spectrum_magnitudes = spectrum[0] # Second array is the phases - to check logged_spectrum_magnitudes = 10*(np.log10(spectrum_magnitudes)) #take log10 logged_spectrum_magnitudes[np.nonzero((logged_spectrum_magnitudes == np.inf) | (logged_spectrum_magnitudes == -np.inf))] = 0 #scaled_spectrum_magnitudes = [10* element for element in logged_spectrum_magnitudes] #multiply by 10x return logged_spectrum_magnitudes ''' couldn't we just use magnitude_spectrum? '''

''' TODO ''' # Database with key=songname, value=spectrogram Spectrograms = {} # Gets the spectrogram for each song and adds it to the Spectrograms database for song in ProcessedDb: Spectrograms[song] = getSpectrogram(ProcessedDb[song]) # Plots each spectrogram. '''what's my x axis frequency?''' for song in Spectrograms: ''' w = np.linspace(0, max??, len(song)) # the FFT of a signal with N samples will have N freqs separated by 1/(NTs) plt.plot(w, Spectrograms[song]) ''' plt.imshow(Spectrograms[song]) plt.title(f"{song}") plt.xlabel("time") plt.ylabel("frequency") plt.show()

''' TODO ''' # Database with key=songname, value=array of local peaks Peaks = {} # Gets the local peaks for each song and adds it to the Peaks database for song in Spectrograms: freqs, times, tuples = utils.get_2D_peaks(Spectrograms[song]) #we can't do [2]? we got a zipobject Peaks[song] = list(tuples) # Plots the peaks over the original spectrogram plt.imshow(Spectrograms[song]) plt.title(f"{song}") plt.xlabel("time") plt.ylabel("frequency") #plt.plot(Peaks[song]) plt.plot(times, freqs, "*", markersize = 1) plt.show() plt.show()

''' TODO ''' ''' Commented code returns a list of sublists where each sublist is a pair def getPairs(peaks): pairs = [] for peak in peaks: # tuple pair_count = 0 for other_peak in peaks: if (other_peak[1] > peak[1] & other_peak[1] - peak[1] <= TIME_DIFF): pair_count += 1 record = [peak, other_peak] pairs.append(record) if (pair_count >= FAN_VALUE): break return ''' ''' def getPairs(peaks): pairs = [] fan_values = {} for peak in peaks: # tuple pair_count = 0 for other_peak in peaks: if (other_peak != peak and other_peak[1] - peak[1] <= TIME_DIFF): pair_count += 1 record = (peak[0], other_peak[0], np.abs(other_peak[1] - peak[1])) # record (f1, f2, tdelta) pairs.append(record) if (pair_count >= FAN_VALUE): break return pairs '''''' ''' def getPairs(peaks): peaks = list(peaks) pairs = [] fan_values = {peak: 0 for peak in peaks} for peak in peaks: # tuple #fan_values[peak] = 0 for other_peak in peaks: if (other_peak != peak and np.abs(other_peak[1] - peak[1]) <= TIME_DIFF and fan_values[peak] < FAN_VALUE and fan_values[other_peak] < FAN_VALUE): fan_values[peak] += 1 fan_values[other_peak] += 1 record = (peak[0], other_peak[0], np.abs(other_peak[1] - peak[1])) # record (f1, f2, tdelta) pairs.append(record) if (fan_values[peak] >= FAN_VALUE): break return pairs

''' TODO ''' # Database with key=fingerprint (f1, f2, tdelta), value=songname LookUpTable = {} # Get fingerprints for each song stores them in the LookUpTable database for song in Peaks: pairs = getPairs(Peaks[song]) for pair in pairs: LookUpTable[pair] = song # Prints a sample of the LookUpTable entries number_pairs_to_print = 0 for element in LookUpTable: # print first 10 elements of LookUpTable (sample) number_pairs_to_print += 1 print(element, LookUpTable[element]) if (number_pairs_to_print == 50): break

# Database with key=songname, value=[channel1, channel2] for a snippet of the song TestDb = {} # Goes through test_mp3s folder and adds a snippet of each song to database for filename in os.listdir("./test_mp3s/"): audiofile = AudioSegment.from_file("./test_mp3s/" + filename) data = np.fromstring(audiofile._data, np.int16)[SAMPLE_RATE*60:SAMPLE_RATE*75] channels = [] for chn in range(audiofile.channels): channels.append(data[chn::audiofile.channels]) TestDb[filename] = channels print("Added to test database. : " + str(filename))

# Goes through test snippets and runs same fingerprinting process # Prints out the number of matches for each song and confidence of prediction for test in TestDb.keys(): print('\033[1mTesting: ' + test + '\033[0m \n') Matches = {} for song in SongDb.keys(): Matches[song] = 0 channels = TestDb[test] preprocessed = Preprocess(channels) spectrogram = getSpectrogram(preprocessed) _, _, peaks = utils.get_2D_peaks(spectrogram) pairs = getPairs(peaks) print(pairs) for p in pairs: match = LookUpTable.get(p, None) if match: Matches[match] += 1 prediction, count = max(Matches.items(), key=itemgetter(1)) for k,v in Matches.items(): if k == prediction: print('\033[1m{0:50} ==> {1:10d} \033[0m'.format(k, v)) else: print('{0:50} ==> {1:10d}'.format(k, v)) confidence = str(float(count)/sum(Matches.values())*100)[:5] + "%" prediction = max(Matches.items(), key=itemgetter(1)) print(f'\nPrediction: {prediction[0]}') print('\033[1m{0:10}: {1:10}\033[0m\n-----------------------------------------------------------------------\n\n'.format('Confidence', confidence))