import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
# Models
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC,SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
# Evaluating
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')
df_wines = pd.read_csv("../data/wine_data_model.csv")
df_wines.columns
label_quality = LabelEncoder()
df_wines['quality_label'] = label_quality.fit_transform(df_wines['quality_label'])
x_train,x_test,y_train,y_test=train_test_split(df_wines.drop(['quality','type'],axis=1),df_wines['quality_label'],test_size=0.20,random_state=42)
models=[
LogisticRegression(),
LinearSVC(),
SVC(kernel='rbf'),
KNeighborsClassifier(),
RandomForestClassifier(),
DecisionTreeClassifier(),
XGBClassifier(),
GaussianNB()
]
model_names = [
'LogisticRegression',
'LinearSVM',
'rbfSVM',
'KNearestNeighbors',
'RandomForestClassifier',
'DecisionTree',
'GradientBoostingClassifier',
'GaussianNB'
]
acc = []
eval_acc = {}
for model in models:
model.fit(x_train, y_train)
prediction = model.predict(x_test)
acc.append(accuracy_score(prediction, y_test))
eval_acc={'Modelling Algorithm':model_names,'Accuracy':acc}
df_model_accuracy = pd.DataFrame.from_dict(eval_acc)
df_model_accuracy = df_model_accuracy.sort_values(by='Accuracy', ascending=False)
df_model_accuracy
sns.barplot(y='Modelling Algorithm',x='Accuracy',data=df_model_accuracy).set(title="Model accuracy")
plt.show()