import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
disney_path = '../input/disney-movies-19372016-total-gross/disney_movies_total_gross.csv'
disney_data = pd.read_csv(disney_path)
disney_data.head()
disney_data.info()
## Converting release_date to date type:
disney_data['release_date'] = pd.to_datetime(disney_data['release_date'])
# Let's create an column named year to visualize later
disney_data['year'] = disney_data['release_date'].dt.date.astype(str).str.split('-', expand=True)[0]
disney_data.dtypes
disney_data.describe().T
print('the best total gross', disney_data['total_gross'].max(),
'\nThe worse total gross:', disney_data['total_gross'].min())
print('the best inflation adjusted gross', disney_data['inflation_adjusted_gross'].max(),'\nThe worse inflation_adjusted_gross:', disney_data['inflation_adjusted_gross'].min())
plt.figure(figsize=(10, 6))
plt.title('Disney movies rating')
sns.countplot(x='mpaa_rating', data=disney_data)
plt.xlabel('rating')
plt.figure(figsize=(10, 6))
plt.title('Disney movies genres')
sns.countplot(y='genre', data=disney_data)
plt.figure(figsize=(12, 8))
plt.xticks(rotation=90)
plt.title('Disney movies per year')
sns.histplot(disney_data['year'], kde=True)
plt.figure(figsize=(12, 6))
plt.title('Evolution of movies gross with time')
plt.xlabel('release date')
plt.ylabel('total gross')
sns.lineplot(x='release_date', y='total_gross', data=disney_data)
plt.figure(figsize=(12, 6))
plt.title('Evolution of movies gross with time adjusted by inflation')
plt.xlabel('release date')
plt.ylabel('inflation adjusted gross')
sns.lineplot(x='release_date', y='inflation_adjusted_gross', data=disney_data)
avg_genre = disney_data.groupby('genre').mean()
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1).set_title('Gross per genre')
sns.barplot(x=avg_genre['total_gross'], y=avg_genre.index)
plt.subplot(2, 1, 2)
sns.barplot(x=avg_genre['inflation_adjusted_gross'], y=avg_genre.index)
sum_genre = disney_data.groupby('genre').sum()
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1).set_title('Gross per genre')
sns.barplot(x=sum_genre['total_gross'], y=sum_genre.index)
plt.subplot(2, 1, 2)
sns.barplot(x=sum_genre['inflation_adjusted_gross'], y=sum_genre.index)
disney_data['genre'].value_counts()
plt.figure(figsize=(12, 8))
sns.stripplot(y='genre', x='total_gross', data=disney_data)
plt.figure(figsize=(12, 8))
sns.stripplot(y='genre', x='inflation_adjusted_gross', data=disney_data)
plt.figure(figsize=(12, 6))
plt.xticks(rotation=30)
plt.ylabel('inflation adjusted gross')
sns.boxplot(x='genre', y='inflation_adjusted_gross', data=disney_data)
avg_rating = disney_data.groupby('mpaa_rating').mean()
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1).set_title('average gross per rating')
sns.barplot(x=avg_rating['total_gross'], y=avg_rating.index)
plt.subplot(2, 1, 2)
sns.barplot(x=avg_rating['inflation_adjusted_gross'], y=avg_rating.index)
avg_rating = disney_data.groupby('mpaa_rating').sum()
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1).set_title('gross per rating')
sns.barplot(x=avg_rating['total_gross'], y=avg_rating.index)
plt.subplot(2, 1, 2)
sns.barplot(x=avg_rating['inflation_adjusted_gross'], y=avg_rating.index)
year_income = disney_data.groupby('year').sum()
plt.figure(figsize=(12, 6))
plt.title('disney movies income')
plt.xlabel('year')
plt.ylabel('income')
plt.xticks(rotation=90)
sns.lineplot(x=year_income.index, y=year_income['total_gross'])
plt.figure(figsize=(12, 6))
plt.title('disney movies income adjusted')
plt.xlabel('year')
plt.ylabel('inflation adjusted income')
plt.xticks(rotation=90)
sns.lineplot(x=year_income.index, y=year_income['inflation_adjusted_gross'])