import os,time
from datetime import datetime, timedelta
from ast import literal_eval
import spotipy
from spotipy.oauth2 import SpotifyOAuth
client_id = os.environ['SPOTIFY_CLIENT_ID']
client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
redirect_uri = 'http://localhost:1234/callback/' # This url doesn't really matter. Just make sure to whitelist it in your dashboard
scope = 'playlist-read-private'
username= os.environ['SPOTIFY_USERNAME']
# set open_browser=False to prevent Spotipy from attempting to open the default browser
spotify = spotipy.Spotify(auth_manager=SpotifyOAuth(
client_id,
client_secret,
redirect_uri,
open_browser=False,
scope=scope,
username=username)
)
import pandas as pd
playlists = spotify.current_user_playlists()
playlists = pd.DataFrame(playlists['items'])
playlists['n_tracks'] = [t['total'] for t in playlists.tracks]
import altair as alt
alt.Chart(playlists).mark_bar().encode(
x=alt.X('name',sort='-y'),
y='n_tracks',
tooltip=['name','n_tracks']
)
def to_tracks_df(tracks):
tracks = tracks['items']
added_at = [datetime.strptime(t['added_at'],'%Y-%m-%dT%H:%M:%SZ') for t in tracks]
tracks = pd.DataFrame([t['track'] for t in tracks])
tracks['artist_name'] = [a[0]['name'] for a in tracks.artists] # There's sometimes multiple artists. Get the first one.
tracks['artist_id'] = [a[0]['id'] for a in tracks.artists]
tracks['added_at'] = added_at
return tracks
tracks = spotify.playlist_tracks(playlists.loc[playlists.name == 'Trop Trash','id'].iloc[0],limit=100)
tracks = to_tracks_df(tracks)
tracks[['artist_name','name']]
# Get the tracks for all playlists
tracks = []
for i,p in playlists.loc[~playlists.name.str.contains('Time Capsule')].iterrows():
t = spotify.playlist_tracks(p.id)
t = to_tracks_df(t)
t['playlist_name'] = p['name']
tracks.append(t)
tracks = pd.concat(tracks,ignore_index=True)
tracks.sort_values('added_at',inplace=True)
tracks['total_count'] = range(1,tracks.shape[0]+1)
data = tracks.copy()
data = data.groupby('playlist_name').filter(lambda x: x.added_at.dt.date.nunique() > 10)
alt.Chart(data).mark_circle(size=70).encode(
x=alt.X('added_at:T',sort='-y',title='date_added',timeUnit='yearmonth',axis=alt.Axis(format='%Y',tickCount=6)),
y='playlist_name:N',
color=alt.Color('playlist_name:N',legend=None),
tooltip=['added_at','name','artist_name','playlist_name']
).properties(
width=880,
height=300
)
try:
artists = pd.read_csv('./artists.csv')
artists.genres = artists.genres.apply(literal_eval)
except:
data = tracks.copy()
data.dropna(subset=['artist_id'],inplace=True)
# Split into chunks, so we don't send too many requests
chunks = [data.artist_id[i:i + 10] for i in range(0, len(data.artist_id), 10)]
artists = []
for i,c in enumerate(chunks):
print(f'Fetching {i+1} of {len(chunks)}...')
res = spotify.artists(c)['artists']
df = pd.DataFrame(res)
artists.append(df)
time.sleep(3) # Wait 3 seconds, so we don't get blocked by spotify's servers
artists = pd.concat(artists,ignore_index=True)
artists.to_csv('./artists.csv',index=False) # Save for later use
data = artists.copy()
data = pd.merge(data,tracks[['added_at','artist_id','playlist_name']],left_on='id',right_on='artist_id')
data = data.explode('genres')
data = data.loc[data.playlist_name != 'Liked from Radio']
data = data.loc[data.genres.isin(data.genres.value_counts().head(15).index)]
alt.data_transformers.disable_max_rows()
alt.Chart(data).mark_bar().encode(
x=alt.X('genres:N',sort='-y'),
y=alt.Y('count()',title='# of tracks added to playlist'),
color=alt.Color('genres:N',legend=None),
tooltip=['genres','count()']
).properties(
width=880
)