import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
df = pd.read_csv("iris.data")
df.shape
df.head(10)
df.tail()
df.describe()
scatter_matrix(df)
plt.show()
array = df.values
X = array[:, 0:4]
y = array[:, 4]
X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size = 0.20, random_state = 7)
model = LogisticRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_validation)
print(accuracy_score(y_validation, predictions))
print(confusion_matrix(y_validation, predictions))
print(classification_report(y_validation, predictions))
0.9666666666666667
[[ 7 0 0]
[ 0 11 1]
[ 0 0 11]]
precision recall f1-score support
Iris-setosa 1.00 1.00 1.00 7
Iris-versicolor 1.00 0.92 0.96 12
Iris-virginica 0.92 1.00 0.96 11
accuracy 0.97 30
macro avg 0.97 0.97 0.97 30
weighted avg 0.97 0.97 0.97 30
X_new = np.array([[5, 2.9, 1, 0.2]])
print(f"X_new.shape: {X_new.shape}")
X_new.shape: (1, 4)
new_prediction = model.predict(X_new)
print(f"Prediction: {new_prediction}.")
Prediction: ['Iris-setosa'].
y_predict = model.predict(X_validation)
print(f"Test set predictions:\n {y_predict}")
print("Test set score (np.mean): {:.2f}%".format(np.mean(y_predict == y_validation)))
Test set predictions:
['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor'
'Iris-virginica' 'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor'
'Iris-setosa' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor'
'Iris-virginica' 'Iris-virginica' 'Iris-setosa' 'Iris-setosa'
'Iris-virginica' 'Iris-virginica' 'Iris-versicolor' 'Iris-virginica'
'Iris-virginica' 'Iris-virginica' 'Iris-versicolor' 'Iris-versicolor'
'Iris-virginica' 'Iris-virginica']
Test set score (np.mean): 0.97%