EDA TED Talks
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('data.csv')
df.head()
df.shape
df.info()
df.isnull().sum()
df.nunique()
# Date
df.sort_values('date', ascending=False)[['title', 'date']]
df.sort_values('date', ascending=True)[['title', 'date']]
# Views
df.sort_values('views', ascending=False)[['title', 'views']]
# Views
df.sort_values('views', ascending=True)[['title', 'views']]
#Likes
df.sort_values('likes', ascending=False)[['title', 'likes']]
#Likes
df.sort_values('likes', ascending=True)[['title', 'likes']]
df.nunique()
df.sort_values('likes', ascending=False)[['author', 'likes']]
df.groupby('author')['likes'].mean().sort_values(ascending=False)
df.groupby('author')['views'].sum().sort_values(ascending=False)
The authors below are the viewest in all TED Talks
df['author'].value_counts()
df.nunique()
df['date'].hist()
plt.xticks(rotation=90)
plt.show()
df.describe()
df.plot(kind='box', vert=False)
df.corr()
sns.pairplot(df)