pip install --upgrade google-api-python-client
from googleapiclient.discovery import build
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
API_KEY = os.environ['API_KEY']
CHANNEL_ID = 'UCRgseC5dkTgIVzN1nb5iL-g'
youtube = build('youtube' , 'v3' , developerKey = API_KEY )
# Function to get the channels stats
# It will also contain the upload playlist ID we can use to grab videos.
def get_channel_stats(youtube, channel_id):
request = youtube.channels().list(
part="snippet,contentDetails,statistics",
id=channel_id
)
response = request.execute()
return response['items']
# This will get us a list of videos from a playlist.
# Note a page of results has a max value of 50 so we will
# need to loop over our results with a pageToken
def get_video_list(youtube, upload_id):
video_list = []
request = youtube.playlistItems().list(
part="snippet,contentDetails",
playlistId=upload_id,
maxResults=50
)
next_page = True
while next_page:
response = request.execute()
data = response['items']
for video in data:
video_id = video['contentDetails']['videoId']
if video_id not in video_list:
video_list.append(video_id)
# Do we have more pages?
if 'nextPageToken' in response.keys():
next_page = True
request = youtube.playlistItems().list(
part="snippet,contentDetails",
playlistId=upload_id,
pageToken=response['nextPageToken'],
maxResults=50
)
else:
next_page = False
return video_list
# Once we have our video list we can pass it to this function to get details.
# Again we have a max of 50 at a time so we will use a for loop to break up our list.
def get_video_details(youtube, video_list):
stats_list=[]
# Can only get 50 videos at a time.
for i in range(0, len(video_list), 50):
request= youtube.videos().list(
part="snippet,contentDetails,statistics",
id=video_list[i:i+50]
)
data = request.execute()
for video in data['items']:
title=video['snippet']['title']
published=video['snippet']['publishedAt']
description=video['snippet']['tag']
tag_count= len(video['snippet']["localized"])
view_count=video['statistics'].get('viewCount',0)
like_count=video['statistics'].get('likeCount',0)
dislike_count=video['statistics'].get('dislikeCount',0)
comment_count=video['statistics'].get('commentCount',0)
stats_dict=dict(title=title, description=description, published=published, tag_count=tag_count, view_count=view_count, like_count=like_count, dislike_count=dislike_count, comment_count=comment_count)
stats_list.append(stats_dict)
return stats_list
channel_stats = get_channel_stats(youtube, CHANNEL_ID)
print(channel_stats)
upload_id = channel_stats[0]['contentDetails']['relatedPlaylists']['uploads']
upload_id
video_list = get_video_list(youtube, upload_id)
video_data = get_video_details(youtube, video_list)
df=pd.DataFrame(video_data)
df['title_length'] = df['title'].str.len()
df["view_count"] = pd.to_numeric(df["view_count"])
df["like_count"] = pd.to_numeric(df["like_count"])
df["dislike_count"] = pd.to_numeric(df["dislike_count"])
df["comment_count"] = pd.to_numeric(df["comment_count"])
# reaction used later add up likes + dislikes + comments
df["reactions"] = df["like_count"] + df["dislike_count"] + df["comment_count"] + df["comment_count"]
df.to_csv("GMM-Data.csv")
df.head(20)
This chart is empty
Chart was probably not set up properly in the notebook
# Histogram of tag count.
chart = sns.distplot( a=df["tag_count"], hist=True, kde=False, rug=False )
chart.set(xlabel='Number of Tags Used', ylabel='Occurrences')
chart = sns.distplot( a=df["title_length"], hist=True, kde=False, rug=False )
chart.set(xlabel='Title Length', ylabel='Occurrences')
df_highest_views = df.nlargest(10, 'view_count')
df_highest_views['title'] = df_highest_views['title'].str[:40]
df_highest_views['view_count_millions'] = df_highest_views['view_count'] / 1000
df_highest_views
sns.set(rc={'figure.figsize':(20,5)})
plot = sns.barplot(x="view_count_millions", y="title", data=df_highest_views, palette="bright")
plot.set(xlabel='1000 of Views', ylabel='')
plot.set_title('Most Viewed Videos')
plt.xlim(20, 32)
# Most Interations ( Like + Dislikes + Comments)
df_highest_reactions = df.nlargest(10, 'reactions')
df_highest_reactions['title'] = df_highest_reactions['title'].str[:40]
df_highest_reactions['reactions'] = df_highest_reactions['reactions'] / 1000
df_highest_reactions
sns.set(rc={'figure.figsize':(20,5)})
plot = sns.barplot(y="title", x="reactions", data=df_highest_reactions, palette="bright")
plot.set(xlabel='Thousands of Reactions', ylabel='')
plt.xticks(rotation=45)
plot.set_title('Most Video Reactions')
plt.xlim(300, 700)
df_highest_disliked = df.nlargest(10, 'dislike_count')
df_highest_disliked['title'] = df_highest_disliked['title'].str[:40]
df_highest_disliked['dislike_count'] = df_highest_disliked['dislike_count'] / 1000
df_highest_disliked
sns.set(rc={'figure.figsize':(20,5)})
plot = sns.barplot(y="title", x="dislike_count", data=df_highest_disliked, palette="bright")
plot.set(xlabel='Thousands of Thumbs Down', ylabel='')
plt.xticks(rotation=45)
plot.set_title('Most Thumbed Down Videos')
plt.xlim(10, 55)