import pandas as pd
pd.read_json('users.json')
# importing our data
import json
with open('users.json') as file:
users = json.load(file)
# defining distance function
def minkowski(ratings1, ratings2, r):
"""Compute the Minkowski Distance between two users."""
mink_distance = 0
for key in ratings1:
if key in ratings2:
mink_distance += abs(ratings1[key] - ratings2[key])**r
mink_distance = mink_distance ** (1/r)
return mink_distance
# finding nearest neighbors
def nearest_neighbors(username, users, r=1):
"""Create a sorted list of users
based on their Minkowski Distance Metric
(Manhattan by default) to username"""
distances = []
for user in users:
if user != username:
mnht_distance = minkowski(users[username], users[user], r)
distances.append((mnht_distance, user))
distances.sort()
return distances
# the recommender system
def recommend_bands(username, users):
"""Recommend bands based on other users' ratings"""
neighbor = nearest_neighbors(username, users)[0][1]
recom_bands = []
for band, rating in users[neighbor].items():
if not band in users[username]:
recom_bands.append((rating, band))
recom_bands.sort(reverse=True)
return [band[1] for band in recom_bands]
# testing our recommender
print(recommend_bands('Hailey', users))
print(recommend_bands('Chan', users))
print(recommend_bands('Angelica', users))
['Phoenix', 'Blues Traveler', 'Slightly Stoopid']
['The Strokes', 'Vampire Weekend']
[]
import numpy as np
jack = np.array([4.00, 4.25, 4.50, 4.75, 5.00])
roby = np.array([4.50, 3.60, 4.80, 3.40, 3.50])
alex = np.array([1, 2, 3, 4, 5])
def pearson_corrcoef(x, y):
x_mean = x.mean()
y_mean = y.mean()
numer = np.sum( (x - x_mean) * (y - y_mean) )
denom = ( np.sum( (x - x_mean)**2 ) )**0.5 * ( np.sum( (y - y_mean)**2 ) )**0.5
return numer / denom
print(pearson_corrcoef(jack, alex))
print(pearson_corrcoef(roby, alex))
0.9999999999999998
-0.5412746144356352
# visualizing the correlation
data = {
'jack': jack,
'roby': roby,
'alex': alex
}
index = [
'Weird Al',
'The Strokes',
'Norah Jones',
'Blues Traveler',
'Phoenix'
]
df = pd.DataFrame(data, index)
# defining a function to use with our users
def pearson_users(user1, user2):
global users
ratings1 = []
ratings2 = []
for key in users[user1]:
if key in users[user2]:
ratings1.append(users[user1][key])
ratings2.append(users[user2][key])
ratings1 = np.array(ratings1)
ratings2 = np.array(ratings2)
return pearson_corrcoef(ratings1, ratings2)
print(pearson_users('Angelica', 'Bill'))
print(pearson_users('Angelica', 'Hailey'))
print(pearson_users('Angelica', 'Jordyn'))
-0.9040534990682688
0.42008402520840293
0.7639748605475433
# comparing jack and alex (perfect similarity) using cosine similarity
x_size = np.sqrt( np.sum(jack**2) )
y_size = np.sqrt( np.sum(alex**2) )
dot_prod = np.dot(jack, alex)
dot_prod / (x_size * y_size)