Data Prep Exercise for a Music Recommendation System
import pandas as pd
user_id_df = pd.read_csv("jams.tsv",sep='\t', usecols=['user_id'], error_bad_lines=False)
user_id_df
len(user_id_df)
user_id_df = user_id_df.drop_duplicates()
len(user_id_df)
sample_user_id = user_id_df.sample(n=2000)
sample_user_id
artist_title_df = pd.read_csv("jams.tsv",sep='\t', usecols=['user_id', 'artist', 'title'], error_bad_lines=False)
artist_title_df
artist_dict = dict( zip(artist_title_df.user_id, artist_title_df.artist) )
title_dict = dict( zip(artist_title_df.user_id, artist_title_df.title) )
sample_user_id['artist'] = sample_user_id['user_id'].map(artist_dict)
sample_user_id['title'] = sample_user_id['user_id'].map(title_dict)
sample_user_id