#COVIDVACCINE 💉😷🦠
Sentiments and Trends of Worldwide Covid-19 Tweets
import pandas as pd
tweets = pd.read_csv('vaccination_tweets.csv')
tweets.head()
tweets.columns
Tweets Data Exploration 🐦
source_counts = tweets.groupby('source').size().to_frame().reset_index().rename(columns = {0:'count'})
top_sources = source_counts.sort_values(by='count', ascending=False).head(10)
top_sources
import seaborn as sns
import matplotlib.pyplot as plt
sns.barplot(data = top_sources, x = 'source', y = 'count');
plt.xticks(rotation = 'vertical');
country_counts = tweets.groupby('user_location').size().to_frame().reset_index().rename(columns = {0:'count'})
top_countries = country_counts.sort_values(by='count', ascending=False).head(3)
sns.barplot(data = top_countries, x = 'user_location', y = 'count');
plt.xticks(rotation = 'vertical');
Text Preparation for Analysis 🔡
tweets['text'] = tweets['text'].apply(lambda text: str(text)).dropna()
tweets['text'] = tweets['text'].apply(lambda text: str.lower(text))
import re
punctuations = r'[^a-zA-Z0-9]'
tweets['text'] = tweets['text'].apply(lambda text: re.sub(punctuations, " ", text))
tweets['text'].head()
Sentiment Analysis 😀😟😐
from textblob import TextBlob
TextBlob('covid is terrible i lost my job and i am going crazy at home.').sentiment.polarity
TextBlob('i so happy and excited to get my vaccine').sentiment.polarity
TextBlob('the vaccine is being administered to my pharmacy').sentiment.subjectivity
TextBlob('i think the vaccine is scary the side effects are bad for me.').sentiment.subjectivity
tweets['polarity_score'] = tweets['text'].apply(lambda x: TextBlob(x).sentiment.polarity).values
sns.histplot(data=tweets, x='polarity_score', stat='density', binwidth=0.1);
plt.title('Distribution of Polarity Score');
tweets.sort_values(by='polarity_score', ascending=False).head(10)['text']
sns.scatterplot(x=tweets['favorites'], y=tweets['polarity_score']);
plt.title('Favorites Vs. Polarity Score');
def score_to_description(polarity):
if polarity > -1 and polarity < -0.5:
return 'Very Negative'
elif polarity > -0.5 and polarity < 0:
return 'Negative'
elif polarity > 0 and polarity < 0.5:
return 'Positive'
elif polarity > 0.5:
return 'Very Positive'
else:
return 'Neutral'
tweets['polarity_classification'] = tweets['polarity_score'].apply(score_to_description)
sns.histplot(data=tweets, x='polarity_classification', stat='density');
plt.title('Distribution of Polarity Classification');
plt.xticks(rotation = 'vertical');
tweets[tweets['polarity_classification']=='Very Negative']['text'].head(5)
tweets['subjectivity_score'] = tweets['text'].apply(lambda x: TextBlob(x).sentiment.subjectivity).values
sns.histplot(data=tweets, x='subjectivity_score', stat='density', binwidth=0.1);
plt.title('Distribution of Subjectivity');
tweets.sort_values(by='subjectivity_score', ascending=False).head(10)['text']
Polarity Comparisons 🌎
india_tweets = tweets[tweets['user_location']=='India']
malay_tweets = tweets[tweets['user_location']=='Malaysia']
plt.hist(data=india_tweets, x='polarity_score', density=True, alpha=0.4, label='India');
plt.hist(data=malay_tweets, x='polarity_score', density=True, alpha=0.4, label='Malaysia');
plt.legend();
plt.title('Title');
pfizer = tweets[tweets['text'].str.contains('pfizer')]
moderna = tweets[tweets['text'].str.contains('moderna')]
plt.hist(data=pfizer, x='polarity_score', density=True, alpha=0.4, label='Pfizer');
plt.hist(data=moderna, x='polarity_score', density=True, alpha=0.4, label='Moderna');
plt.legend();
plt.title('Title');