#Importing the libraries
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
warnings.simplefilter("ignore")
#Reading the csv file for Trump related tweets
trump = pd.read_csv("Trumpall2.csv")
trump.head()
#Reading the csv file for Biden related tweets
biden= pd.read_csv("Bidenall2.csv")
biden.head()
#Importing textblob library
from textblob import TextBlob
#Sentiment Analysis using Textblob for Trump
tb1 = TextBlob(trump["text"][10])
print("Donald Trump :",tb1.sentiment)
#Sentiment Analysis using Textblob for Biden
tb2 = TextBlob(biden["text"][500])
print("Joe Biden :",tb2.sentiment)
#Defining the functon to return the sentiment polarity
def findPoll(review):
return TextBlob(review).sentiment.polarity
#Setting the centiment polarity column in Dataframe Trump
trump["Sentiment Polarity"] = trump["text"].apply(findPoll)
print(trump.head())
#Setting the centiment polarity column in Dataframe Biden
biden["Sentiment Polarity"] = biden["text"].apply(findPoll)
print(biden.head())
#setting the expression for each data in a new column for Trump
trump["Opinion Label"] = np.where(trump["Sentiment Polarity"]>0, "positive", "negative")
trump["Opinion Label"][trump["Sentiment Polarity"]==0]="Neutral"
print(trump.head())
#setting the expression for each data in a new column for Biden
biden["Opinion Label"] = np.where(biden["Sentiment Polarity"]>0, "positive", "negative")
biden["Opinion Label"][trump["Sentiment Polarity"]==0]="Neutral"
print(biden.head())
#We are dropping the data with neutral tweets for Trump in reviews1
reviews1 = trump[trump['Sentiment Polarity'] == 0.0000]
cond1=trump['Sentiment Polarity'].isin(reviews1['Sentiment Polarity'])
trump.drop(trump[cond1].index, inplace = True)
print(trump.shape)
#We are dropping the data with neutral tweets for Biden in reviews2
reviews2 = biden[biden['Sentiment Polarity'] == 0.0000]
cond2=biden['Sentiment Polarity'].isin(reviews1['Sentiment Polarity'])
biden.drop(biden[cond2].index, inplace = True)
print(biden.shape)
#Balancing both the datsets
#For Trump dataset
np.random.seed(10)
remove=324
dropIndices = np.random.choice(trump.index, remove, replace=False)
df_trump = trump.drop(dropIndices)
print("Shape of trump Subset :",df_trump.shape)
#For Biden datset
np.random.seed(10)
remove =31
dropIndices = np.random.choice(biden.index, remove, replace=False)
df_biden = biden.drop(dropIndices)
print("Shape of Biden Subset :",df_biden.shape)
#Analyzing the number of positive and negative sentiments in Trump Data
count_Trump = df_trump.groupby('Opinion Label').count()
negative_percentage1 = (count_Trump['Sentiment Polarity'][0]/1000)*10
positive_percentage1 = (count_Trump['Sentiment Polarity'][1]/1000)*100
#Analyzing the number of positive and negative sentiments in Biden Data
count_Biden = df_biden.groupby('Opinion Label').count()
negative_percentage2 = (count_Biden['Sentiment Polarity'][0]/1000)*10
positive_percentage2 = (count_Biden['Sentiment Polarity'][1]/1000)*100
#Setting up the data for plotting
Politicians = ['Joe Biden', 'Donald Trump']
lis_pos = [positive_percentage1, positive_percentage2]
lis_neg = [negative_percentage1, negative_percentage2]
#Importing matplotlib library
import matplotlib.pyplot as plt
#Plotting the bar graph using Matplotlib for Trump vs Biden
plt.bar(x=Politicians,height=lis_pos,label="Positive")
plt.bar(x=Politicians,height=lis_neg,label="Negative")
plt.legend()
plt.title("US ELECTION Trump vs Biden")
plt.show()