# Connecting Google Drive with Google Colab
from google.colab import drive
drive.mount('/content/drive/')
# Importing essential libraries
import numpy as np
import pandas as pd
# Loading the dataset
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Datasets/Restaurant_Reviews.tsv', delimiter='\t', quoting=3)
df.shape
df.columns
df.head()
# Importing essential libraries for performing Natural Language Processing on 'Restaurant_Reviews.tsv' dataset
import nltk
import re
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
# Cleaning the reviews
corpus = []
for i in range(0,1000):
# Cleaning special character from the reviews
review = re.sub(pattern='[^a-zA-Z]',repl=' ', string=df['Review'][i])
# Converting the entire review into lower case
review = review.lower()
# Tokenizing the review by words
review_words = review.split()
# Removing the stop words
review_words = [word for word in review_words if not word in set(stopwords.words('english'))]
# Stemming the words
ps = PorterStemmer()
review = [ps.stem(word) for word in review_words]
# Joining the stemmed words
review = ' '.join(review)
# Creating a corpus
corpus.append(review)
corpus[0:10]
# Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=1500)
X = cv.fit_transform(corpus).toarray()
y = df.iloc[:, 1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB()
classifier.fit(X_train, y_train)
# Predicting the Test set results
y_pred = classifier.predict(X_test)
# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred)
score3= recall_score(y_test,y_pred)
print("---- Scores ----")
print("Accuracy score is: {}%".format(round(score1*100,2)))
print("Precision score is: {}".format(round(score2,2)))
print("Recall score is: {}".format(round(score3,2)))
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm
# Plotting the confusion matrix
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.figure(figsize = (10,6))
sns.heatmap(cm, annot=True, cmap="YlGnBu", xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.xlabel('Predicted values')
plt.ylabel('Actual values')
# Hyperparameter tuning the Naive Bayes Classifier
best_accuracy = 0.0
alpha_val = 0.0
for i in np.arange(0.1,1.1,0.1):
temp_classifier = MultinomialNB(alpha=i)
temp_classifier.fit(X_train, y_train)
temp_y_pred = temp_classifier.predict(X_test)
score = accuracy_score(y_test, temp_y_pred)
print("Accuracy score for alpha={} is: {}%".format(round(i,1), round(score*100,2)))
if score>best_accuracy:
best_accuracy = score
alpha_val = i
print('--------------------------------------------')
print('The best accuracy is {}% with alpha value as {}'.format(round(best_accuracy*100, 2), round(alpha_val,1)))
classifier = MultinomialNB(alpha=0.2)
classifier.fit(X_train, y_train)
def predict_sentiment(sample_review):
sample_review = re.sub(pattern='[^a-zA-Z]',repl=' ', string = sample_review)
sample_review = sample_review.lower()
sample_review_words = sample_review.split()
sample_review_words = [word for word in sample_review_words if not word in set(stopwords.words('english'))]
ps = PorterStemmer()
final_review = [ps.stem(word) for word in sample_review_words]
final_review = ' '.join(final_review)
temp = cv.transform([final_review]).toarray()
return classifier.predict(temp)
# Predicting values
sample_review = 'The food is really good here.'
if predict_sentiment(sample_review):
print('This is a POSITIVE review.')
else:
print('This is a NEGATIVE review!')
# Predicting values
sample_review = 'Food was pretty bad and the service was very slow.'
if predict_sentiment(sample_review):
print('This is a POSITIVE review.')
else:
print('This is a NEGATIVE review!')
# Predicting values
sample_review = 'The food was absolutely wonderful, from preparation to presentation, very pleasing.'
if predict_sentiment(sample_review):
print('This is a POSITIVE review.')
else:
print('This is a NEGATIVE review!')