NLP - Predict US Election

#Importing the libraries import pandas as pd import numpy as np import seaborn as sns import warnings warnings.simplefilter("ignore")

#Reading the csv file for Trump related tweets trump = pd.read_csv("Trumpall2.csv") trump.head()

#Reading the csv file for Biden related tweets biden= pd.read_csv("Bidenall2.csv") biden.head()

#Importing textblob library from textblob import TextBlob

#Sentiment Analysis using Textblob for Trump tb1 = TextBlob(trump["text"][10]) print("Donald Trump :",tb1.sentiment)

#Sentiment Analysis using Textblob for Biden tb2 = TextBlob(biden["text"][500]) print("Joe Biden :",tb2.sentiment)

#Defining the functon to return the sentiment polarity def findPoll(review): return TextBlob(review).sentiment.polarity

#Setting the centiment polarity column in Dataframe Trump trump["Sentiment Polarity"] = trump["text"].apply(findPoll) print(trump.head())

#Setting the centiment polarity column in Dataframe Biden biden["Sentiment Polarity"] = biden["text"].apply(findPoll) print(biden.head())

#setting the expression for each data in a new column for Trump trump["Opinion Label"] = np.where(trump["Sentiment Polarity"]>0, "positive", "negative") trump["Opinion Label"][trump["Sentiment Polarity"]==0]="Neutral" print(trump.head())

#setting the expression for each data in a new column for Biden biden["Opinion Label"] = np.where(biden["Sentiment Polarity"]>0, "positive", "negative") biden["Opinion Label"][trump["Sentiment Polarity"]==0]="Neutral" print(biden.head())

#We are dropping the data with neutral tweets for Trump in reviews1 reviews1 = trump[trump['Sentiment Polarity'] == 0.0000] cond1=trump['Sentiment Polarity'].isin(reviews1['Sentiment Polarity']) trump.drop(trump[cond1].index, inplace = True) print(trump.shape)

#We are dropping the data with neutral tweets for Biden in reviews2 reviews2 = biden[biden['Sentiment Polarity'] == 0.0000] cond2=biden['Sentiment Polarity'].isin(reviews1['Sentiment Polarity']) biden.drop(biden[cond2].index, inplace = True) print(biden.shape)

#Balancing both the datsets #For Trump dataset np.random.seed(10) remove=324 dropIndices = np.random.choice(trump.index, remove, replace=False) df_trump = trump.drop(dropIndices) print("Shape of trump Subset :",df_trump.shape) #For Biden datset np.random.seed(10) remove =31 dropIndices = np.random.choice(biden.index, remove, replace=False) df_biden = biden.drop(dropIndices) print("Shape of Biden Subset :",df_biden.shape)

#Analyzing the number of positive and negative sentiments in Trump Data count_Trump = df_trump.groupby('Opinion Label').count() negative_percentage1 = (count_Trump['Sentiment Polarity'][0]/1000)*10 positive_percentage1 = (count_Trump['Sentiment Polarity'][1]/1000)*100

#Analyzing the number of positive and negative sentiments in Biden Data count_Biden = df_biden.groupby('Opinion Label').count() negative_percentage2 = (count_Biden['Sentiment Polarity'][0]/1000)*10 positive_percentage2 = (count_Biden['Sentiment Polarity'][1]/1000)*100

#Setting up the data for plotting Politicians = ['Joe Biden', 'Donald Trump'] lis_pos = [positive_percentage1, positive_percentage2] lis_neg = [negative_percentage1, negative_percentage2]

#Importing matplotlib library import matplotlib.pyplot as plt

#Plotting the bar graph using Matplotlib for Trump vs Biden plt.bar(x=Politicians,height=lis_pos,label="Positive") plt.bar(x=Politicians,height=lis_neg,label="Negative") plt.legend() plt.title("US ELECTION Trump vs Biden") plt.show()