from pydub import AudioSegment
from scipy import signal
from operator import itemgetter
import pyaudio
import numpy as np
import utils
import os
import sys
import warnings
import operator
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
FORMAT = pyaudio.paInt16
'''
Number of audio channels in the recording
'''
CHANNELS = 2
'''
Original sample rate of the recordings
'''
SAMPLE_RATE = 44100
'''
Sampling rate (after downsampling)
'''
FS = 8000
'''
Factor by which the original signal will be downsampled
'''
DECIMATION_FACTOR = SAMPLE_RATE/FS
'''
Size of the FFT window, affects frequency granularity (we saw this in class!)
'''
WINDOW_SIZE = 1024
'''
Degree to which a fingerprint can be paired with its neighbors --
higher will cause more fingerprints, but potentially better accuracy.
'''
FAN_VALUE = 15
'''
Ratio by which each window overlaps the previous and next window --
higher will cause more fingerprints, but higher granularity of offset matching
'''
OVERLAP_RATIO = 0.5
MAX_PAIR = 150
path = os.getcwd()
warnings.filterwarnings("ignore", message="divide by zero encountered in log10")
warnings.filterwarnings("ignore", category=DeprecationWarning)
# Database with key=songname, value=[channel1, channel2]
SongDb = {}
#Goes through mp3s folder and adds each song to database
for filename in os.listdir(path + "/mp3s/"):
audiofile = AudioSegment.from_file(path + "/mp3s/" + filename)
data = np.fromstring(audiofile._data, np.int16)
channels = []
for chn in range(audiofile.channels):
channels.append(data[chn::audiofile.channels])
SongDb[filename[:-3]] = channels
print("Added to song database: " + str(filename[:-4]))
Added to song database: The Beatles - Let It Be
Added to song database: Michael Jackson - Billie Jean
Added to song database: Adele - Rolling in the deep
Added to song database: Rihanna - Disturbia
Added to song database: Whitney Houston - I Will Always Love You
print(SongDb)
{'The Beatles - Let It Be.': [array([30, 37, 31, ..., 0, 0, 0], dtype=int16), array([36, 51, 50, ..., 0, 0, 0], dtype=int16)], 'Michael Jackson - Billie Jean.': [array([0, 0, 0, ..., 0, 0, 0], dtype=int16), array([ 0, 0, 0, ..., -1, 1, -1], dtype=int16)], 'Adele - Rolling in the deep.': [array([0, 0, 0, ..., 0, 0, 0], dtype=int16), array([0, 0, 0, ..., 0, 0, 0], dtype=int16)], 'Rihanna - Disturbia.': [array([0, 0, 0, ..., 0, 0, 0], dtype=int16), array([0, 0, 0, ..., 0, 0, 0], dtype=int16)], 'Whitney Houston - I Will Always Love You.': [array([0, 0, 0, ..., 0, 0, 0], dtype=int16), array([0, 0, 0, ..., 0, 0, 0], dtype=int16)]}
def Preprocess(channels):
channel1 = channels[0]
channel2 = channels[1]
channelmean = ((channel1 + channel2)/2 - np.mean(channel1 + channel2))
resampled = signal.decimate(channelmean, int(DECIMATION_FACTOR))
return resampled
# Database with key=songname, value=processed signal
ProcessedDb = {}
#Processes each song and adds it to ProcessedDb
#Prints table of number of samples in for each song
print('{0:65}{1:22}{2:20}\n'.format('Song Name', 'Original #Samples', 'Processed #Samples'))
for song, sig in SongDb.items():
processed = Preprocess(sig)
ProcessedDb[song] = processed
original_duration = len(sig[0])
processed_duration = len(processed)
print('{0:50}{1:32d}{2:20d}'.format(song, original_duration, processed_duration))
Song Name Original #Samples Processed #Samples
The Beatles - Let It Be. 10718144 2143629
Michael Jackson - Billie Jean. 16920576 3384116
Adele - Rolling in the deep. 10111104 2022221
Rihanna - Disturbia. 10525824 2105165
Whitney Houston - I Will Always Love You. 11810476 2362096
def getSpectrogram(signal):
magnitude_values = []
magnitude_values, frequency, t = mlab.specgram(signal, NFFT=WINDOW_SIZE, Fs=FS, noverlap= OVERLAP_RATIO)
for value in range(len(magnitude_values)):
magnitude_values[value] = np.log(np.abs(magnitude_values[value])) * 10
return magnitude_values
''' TODO '''
# Database with key=songname, value=spectrogram
Spectrograms = {}
# Gets the spectrogram for each song and adds it to the Spectrograms database
for song, sig in ProcessedDb.items():
spectrogram_magnitudes = getSpectrogram(sig)
Spectrograms[song] = spectrogram_magnitudes
# Plots each spectrogram
for song, spect_val in Spectrograms.items():
plt.imshow(spect_val)
plt.title(song)
plt.show()
''' TODO '''
# Database with key=songname, value=array of local peaks
Peaks = {}
# Gets the local peaks for each song and adds it to the Peaks database
for song, spect_val in Spectrograms.items():
x, y, spectrogram_peaks = utils.get_2D_peaks(spectrogram_magnitudes)
Peaks[song] = spectrogram_peaks
plt.title(song)
plt.imshow(spect_val)
plt.plot(y,x,".", markersize = 1)
# ^ I initially had a problem above, where I plotted x,y and got the dots off of the spectogram, I flipped it to use y,x for each point!
plt.show()
# Plots the peaks over the original spectrogram
''' TODO '''
def getPairs(peaks):
peak_pairs = []
peaks = list(peaks) #I had a 'zip' has no len() error... this may fix it!
# I'll set two for loops to account for both peaks
for i in range(len(peaks)):
for j in range(len(peaks)):
if (i != j) and (np.abs(peaks[i][1] - peaks[j][1]) <= MAX_PAIR) and i < FAN_VALUE and j < FAN_VALUE:
peak_pairs.append((peaks[i][0], np.abs(peaks[i][1] - peaks[j][1])))
i += 1
j += 1
return peak_pairs
''' TODO '''
# Database with key=fingerprint (f1, f2, tdelta), value=songname
LookUpTable = {}
# Get fingerprints for each song stores them in the LookUpTable database
for song, peak in Peaks.items():
pairs = getPairs(peak)
for pair in pairs:
LookUpTable[tuple(pair)] = song
# Prints a sample of the LookUpTable entries
items = list(LookUpTable.items())
items = items[:][:3]
print(items)
[((0, 24), 'Whitney Houston - I Will Always Love You.'), ((0, 22), 'Whitney Houston - I Will Always Love You.'), ((0, 102), 'Whitney Houston - I Will Always Love You.')]
# Database with key=songname, value=[channel1, channel2] for a snippet of the song
TestDb = {}
# Goes through test_mp3s folder and adds a snippet of each song to database
for filename in os.listdir("./test_mp3s/"):
audiofile = AudioSegment.from_file("./test_mp3s/" + filename)
data = np.fromstring(audiofile._data, np.int16)[SAMPLE_RATE*60:SAMPLE_RATE*75]
channels = []
for chn in range(audiofile.channels):
channels.append(data[chn::audiofile.channels])
TestDb[filename] = channels
print("Added to test database. : " + str(filename))
Added to test database. : Whitney Houston - I Will Always Love You (Official Music Video).mp3
Added to test database. : Rihanna - Disturbia.mp3
Added to test database. : Beatles - Let It Be [1970].mp3
Added to test database. : Let It Be (Remastered 2009).mp3
Added to test database. : Michael Jackson - Billie Jean (Official Music Video).mp3
Added to test database. : Adele - Rolling in the Deep.mp3
# Goes through test snippets and runs same fingerprinting process
# Prints out the number of matches for each song and confidence of prediction
for test in TestDb.keys():
print('\033[1mTesting: ' + test + '\033[0m \n')
Matches = {}
for song in SongDb.keys():
Matches[song] = 0
channels = TestDb[test]
preprocessed = Preprocess(channels)
spectrogram = getSpectrogram(preprocessed)
_, _, peaks = utils.get_2D_peaks(spectrogram)
pairs = getPairs(peaks)
for p in pairs:
match = LookUpTable.get(p, None)
if match:
Matches[match] += 1
prediction, count = max(Matches.items(), key=itemgetter(1))
for k,v in Matches.items():
if k == prediction:
print('\033[1m{0:50} ==> {1:10d} \033[0m'.format(k, v))
else:
print('{0:50} ==> {1:10d}'.format(k, v))
confidence = str(float(count)/sum(Matches.values())*100)[:5] + "%"
prediction = max(Matches.items(), key=itemgetter(1))
print(f'\nPrediction: {prediction[0]}')
print('\033[1m{0:10}: {1:10}\033[0m\n-----------------------------------------------------------------------\n\n'.format('Confidence', confidence))
Testing: Whitney Houston - I Will Always Love You (Official Music Video).mp3
The Beatles - Let It Be. ==> 0
Michael Jackson - Billie Jean. ==> 0
Adele - Rolling in the deep. ==> 0
Rihanna - Disturbia. ==> 0
Whitney Houston - I Will Always Love You. ==> 0
ZeroDivisionError: float division by zero