This notebook is for essay "‘Clicks for Charity’: Exploring the Relationship between Philanthropy and Branding in the Context of MrBeast's YouTube Channel".
Author: Minh-Huong Le, ID:13581759 @ University of Amsterdam
Honours Module: The Digital Presence - Global Practices
Prepare
!pip install wordcloud
from wordcloud import WordCloud
import matplotlib.pyplot as plt
!pip install emoji
import pandas as pd
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
COMMON_WORDS=set(stopwords.words('english'))
COMMON_WORDS.update(['mr', 'beast', 'mrbeast', 'jimmy'])
COMMON_WORDS.update(stopwords.words('spanish'))
COMMON_WORDS.update(stopwords.words('russian'))
COMMON_WORDS.update(stopwords.words('french'))
import emoji
def remove_emoji(string):
return emoji.replace_emoji(string, '')
remove_emoji('devamı gelsin 🙏devamı gelsin 🙏🥹')
def remove_common_words(txt):
l = ' '.join(filter(lambda w: not w.lower() in COMMON_WORDS, txt.split()))
return l
remove_common_words('Xinhan is the most beautiful girlfriend mr aha beast que')
!pip install lingua-language-detector
Emotion Analysis
df = pd.read_json('dataset.json')
df.head()
cleandf = df.applymap(lambda x: remove_emoji(x) if type(x) == str else x)
cleandf
cleandf
supercleandf = cleandf.applymap(lambda x: remove_common_words(x) if type(x) == str else x)
supercleandf
from transformers import pipeline
# from lingua import Language, LanguageDetectorBuilder
# languages = [Language.ENGLISH, Language.FRENCH, Language.GERMAN, Language.SPANISH]
# detector = LanguageDetectorBuilder.from_languages(*languages).build()
# langs = df['text'].apply(detector.detect_language_of)
# langs
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
classifier(['Hey', 'Hello'])
emotions = classifier([x[:100] for x in cleandf['text']])
emotions
df.insert(0, 'emotion', [x['label'] for x in emotions])
df.insert(0, 'emotion_confidence_score', [x['score'] for x in emotions])
df
df.to_csv('output.csv')
Visualize
import pandas as pd
df = pd.read_csv('output.csv')
df.head()
supercleandf = df.applymap(lambda x: remove_common_words(remove_emoji(x)) if type(x) == str else x)
supercleandf
clean_emotion_df = df.drop(df[(df['emotion'] == 'neutral') | (df['emotion_confidence_score'] < 0.7)].index)
well = clean_emotion_df[clean_emotion_df['from'] == 'well']
blind = clean_emotion_df[clean_emotion_df['from'] == 'blind']
givemyfriend = clean_emotion_df[clean_emotion_df['from'] == 'givemyfriend']
givesubscriberisland = clean_emotion_df[clean_emotion_df['from'] == 'givesubscriberisland']
last2leave = clean_emotion_df[clean_emotion_df['from'] == 'last2leave']
%matplotlib inline
text = ' '.join(supercleandf['text'])
# Generate a word cloud image
wordcloud = WordCloud(max_font_size=220, background_color="white",width=1920, height=1080).generate(text)
wordcloud.to_image().show()
df['from'].unique()
clean_emotion_df
for t in clean_emotion_df[clean_emotion_df['emotion'] == 'fear']['text']:
if 'help' in t:
print(t)
# clean_emotion_df[clean_emotion_df['emotion'] == 'fear']
# a = pd.DataFrame()
# a['text'] = clean_emotion_df[clean_emotion_df['emotion'] == 'fear']['text']
# a
%matplotlib inline
text = ' '.join(clean_emotion_df[clean_emotion_df['emotion'] == 'fear']['text'])
# Generate a word cloud image
wordcloud = WordCloud(max_font_size=220, background_color="white",width=1920, height=1080).generate(text)
wordcloud.to_image().show()
%matplotlib inline
text = ' '.join(clean_emotion_df[clean_emotion_df['emotion'] == 'joy']['text'])
# Generate a word cloud image
wordcloud = WordCloud(max_font_size=220, background_color="white",width=1920, height=1080).generate(text)
wordcloud.to_image().show()
%matplotlib inline
text = ' '.join(clean_emotion_df[clean_emotion_df['emotion'] == 'surprise']['text'])
# Generate a word cloud image
wordcloud = WordCloud(max_font_size=220, background_color="white",width=1920, height=1080).generate(text)
wordcloud.to_image().show()
%matplotlib inline
text = ' '.join(clean_emotion_df[clean_emotion_df['emotion'] == 'anger']['text'])
# Generate a word cloud image
wordcloud = WordCloud(max_font_size=220, background_color="white",width=1920, height=1080).generate(text)
wordcloud.to_image().show()
%matplotlib inline
text = ' '.join(clean_emotion_df[clean_emotion_df['emotion'] == 'disgust']['text'])
# Generate a word cloud image
wordcloud = WordCloud(max_font_size=220, background_color="white",width=1920, height=1080).generate(text)
wordcloud.to_image().show()