Fandango Rating Discrepancy

import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns

fandango = pd.read_csv('fandango_scrape.csv')

fandango.head()

fandango.describe()

plt.figure(figsize=(10, 4), dpi=150) sns.scatterplot(data=fandango, y="VOTES", x="RATING")

fandango.corr()

# example title = 'Film Title Name (Year)'

title.split('(')[-1].replace(')', '')

fandango['YEAR'] = fandango['FILM'].apply(lambda title: title.split('(')[-1].replace(')', ''))

fandango

fandango['YEAR'].value_counts()

sns.countplot(data=fandango, x='YEAR')

len(fandango[fandango['VOTES'] == 0])

fan_reviewed = fandango[fandango['VOTES'] > 0]

plt.figure(figsize=(10,4), dpi=150) sns.kdeplot(data=fan_reviewed, x='RATING', clip=[0, 5], fill=True, label='True Rating') sns.kdeplot(data=fan_reviewed, x='STARS', clip=[0, 5], fill=True, label='Stars Displayed') plt.legend(loc=(1.05, 0.5))

fan_reviewed['STARS_DIFF'] = fan_reviewed['STARS'] - fan_reviewed['RATING']

fan_reviewed['STARS_DIFF'] = fan_reviewed['STARS_DIFF'].round(2)

fan_reviewed

plt.figure(figsize=(12, 4), dpi=150) sns.countplot(data=fan_reviewed, x='STARS_DIFF')

fan_reviewed[fan_reviewed['STARS_DIFF'] == 1]

all_sites = pd.read_csv('all_sites_scores.csv')

all_sites.head()

all_sites.describe()

plt.figure(dpi=150) sns.scatterplot(data=all_sites, x='RottenTomatoes', y='RottenTomatoes_User') plt.ylim(0, 100) plt.xlim(0, 100)

all_sites['Rotten_Diff'] = all_sites['RottenTomatoes'] - all_sites['RottenTomatoes_User']

all_sites['Rotten_Diff'].apply(abs).mean()

plt.figure(figsize=(10, 4), dpi=200) sns.histplot(data=all_sites, x='Rotten_Diff', kde=True, bins=25)

plt.figure(figsize=(10, 4), dpi=200) sns.histplot(x=all_sites['Rotten_Diff'].apply(abs), kde=True, bins=25)

all_sites.nsmallest(5, 'Rotten_Diff')[['FILM', 'Rotten_Diff']]

plt.figure(figsize=(10, 4), dpi=150) sns.scatterplot(data=all_sites, x='Metacritic', y='Metacritic_User') plt.xlim(0, 100) plt.ylim(0, 10)

plt.figure(figsize=(10,4),dpi=150) sns.scatterplot(data=all_sites,x='Metacritic_user_vote_count',y='IMDB_user_vote_count')

all_sites.nlargest(1, 'IMDB_user_vote_count')

all_sites.nlargest(1, 'Metacritic_user_vote_count')

df = pd.merge(fandango, all_sites, on='FILM', how='inner')

df.info()

df.head()

# Dont run this cell multiple times, otherwise you keep dividing! df['RT_Norm'] = np.round(df['RottenTomatoes']/20,1) df['RTU_Norm'] = np.round(df['RottenTomatoes_User']/20,1)

# Dont run this cell multiple times, otherwise you keep dividing! df['Meta_Norm'] = np.round(df['Metacritic']/20,1) df['Meta_U_Norm'] = np.round(df['Metacritic_User']/2,1)

# Dont run this cell multiple times, otherwise you keep dividing! df['IMDB_Norm'] = np.round(df['IMDB']/2,1)

df.head()

df.columns

norm_scores = df[['STARS', 'RATING', 'RT_Norm', 'RTU_Norm', 'Meta_Norm', 'Meta_U_Norm', 'IMDB_Norm']]

norm_scores.head()

def move_legend(ax, new_loc, **kws): old_legend = ax.legend_ handles = old_legend.legendHandles labels = [t.get_text() for t in old_legend.get_texts()] title = old_legend.get_title().get_text() ax.legend(handles, labels, loc=new_loc, title=title, **kws)

fig, ax = plt.subplots(figsize=(15,6),dpi=150) sns.kdeplot(data=norm_scores,clip=[0,5],shade=True,palette='Set1',ax=ax) move_legend(ax, 'upper left')

fig, ax = plt.subplots(figsize=(15,6),dpi=150) sns.kdeplot(data=norm_scores[['RT_Norm','STARS']],clip=[0,5],shade=True,palette='Set1',ax=ax) move_legend(ax, "upper left")

plt.subplots(figsize=(15,6),dpi=150) sns.histplot(norm_scores,bins=50)

sns.clustermap(norm_scores, col_cluster=False)

norm_films = df[['STARS','RATING','RT_Norm','RTU_Norm','Meta_Norm','Meta_U_Norm','IMDB_Norm','FILM']]

norm_films.nsmallest(10, 'RT_Norm')

plt.figure(figsize=(15,6),dpi=150) worst_films = norm_films.nsmallest(10,'RT_Norm').drop('FILM',axis=1) sns.kdeplot(data=worst_films,clip=[0,5],shade=True,palette='Set1') plt.title("Ratings for RT Critic's 10 Worst Reviewed Films");