import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
df=pd.read_csv("netflix_titles.csv")
df.head()
df.shape
df.columns
df.isnull().sum()
df=df.fillna('Not specified')
df.nunique(axis=0)
director_name=pd.DataFrame()
director_name=df['director'].str.split(',',expand=True).stack()
director_name=director_name.to_frame()
director_name.columns=['Director']
directors=director_name.groupby(['Director']).size().reset_index(name='Total Content')
directors=directors[directors.Director !='Not specified']
directors=directors.sort_values(by=['Total Content'],ascending=False)
directorsTop5=directors.head()
directorsTop5=directorsTop5.sort_values(by=['Total Content'])
fig1=px.bar(directorsTop5,x='Total Content',y='Director',title='Top 5 Directors on Netflix')
fig1.show()
cast_name=pd.DataFrame()
cast_name=df['cast'].str.split(',',expand=True).stack()
cast_name=cast_name.to_frame()
cast_name.columns=['Actor']
actors=cast_name.groupby(['Actor']).size().reset_index(name='Total Content')
actors=actors[actors.Actor !='Not specified']
actors=actors.sort_values(by=['Total Content'],ascending=False)
actorsTop5=actors.head()
actorsTop5=actorsTop5.sort_values(by=['Total Content'])
fig2=px.bar(actorsTop5,x='Total Content',y='Actor', title='Top 5 Actors on Netflix')
fig2.show()
p=df.groupby(['rating']).size().reset_index(name='counts')
piechart=px.pie(p,values='counts',names='rating',title='Ratings of different contents on netflix')
piechart.show()
df1=df[['type','release_year']]
df1=df1.rename(columns={"release_year": "Release Year"})
df2=df1.groupby(['Release Year','type']).size().reset_index(name='Total Content')
df2=df2[df2['Release Year']>=2010]
fig3 = px.line(df2, x="Release Year", y="Total Content", color='type',title='Trend of content produced over the years on Netflix')
fig3.show()
from textblob import TextBlob
dfx=df[['release_year','description']]
dfx=dfx.rename(columns={'release_year':'Release Year'})
for index,row in dfx.iterrows():
z=row['description']
testimonial=TextBlob(z)
p=testimonial.sentiment.polarity
if p==0:
sent='Neutral'
elif p>0:
sent='Positive'
else:
sent='Negative'
dfx.loc[[index,2],'Sentiment']=sent
dfx=dfx.groupby(['Release Year','Sentiment']).size().reset_index(name='Total Content')
dfx=dfx[dfx['Release Year']>=2010]
fig4 = px.bar(dfx, x="Release Year", y="Total Content", color="Sentiment", title="Sentiment of content on Netflix")
fig4.show()