Mine Detection using Sonar Data

# import useful libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns

# this data set contains both patterns for rock and mine df = pd.read_csv('sonar.all-data.csv')

# there are 208 patterns total df.shape

# note that each column value only ranges from 0 to 1 df.describe().T

df.head()

# in order to see the correlation, we will first need to change our label to numerical values # we map, Rock for '0' and Mine for '1' df['Label_num'] = df['Label'].map({'R':0,'M':1})

# heatmap of correlation plt.figure(figsize=(10,8)) sns.heatmap(df.corr(),cmap='mako');

# 5 features with highest correlation with our label # we are interested in their absolute correlation, so we use np.abs() np.abs(df.corr()['Label_num']).sort_values(ascending=False)[1:6]

# Barplot of correlations between features and label corr_label = df.corr()['Label_num'].sort_values(ascending=False)[1:] plt.figure(figsize=(10,8)) sns.barplot(x=corr_label.index,y=corr_label) plt.xticks(rotation=90);

from sklearn.model_selection import train_test_split

# We need to drop both Label and Label_num columns to separate out features X = df.drop(['Label_num','Label'],axis=1) y = df['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import Pipeline from sklearn.model_selection import GridSearchCV

scaler = StandardScaler() knn = KNeighborsClassifier() pipe = Pipeline([('scaler',scaler),('knn',knn)])

# set up parameter grid k_vals = list(range(1,40)) param_grid = {'knn__n_neighbors':k_vals} # set up gridsearchCV grid = GridSearchCV(pipe,param_grid=param_grid,scoring='accuracy')

# fit the model to training data grid.fit(X_train,y_train)

# what are the parameters of best performing estimator? grid.best_estimator_.get_params()

# grid.cv_results_ is a dictionary type grid.cv_results_['mean_test_score']

# visualization of above test scores score_per_k = grid.cv_results_['mean_test_score'] plt.figure(figsize=(8,6)) plt.plot(k_vals,score_per_k) plt.scatter(k_vals,score_per_k) plt.vlines(x=1,ymin=0.650,ymax=0.850,colors='red')

from sklearn.metrics import classification_report,plot_confusion_matrix

plot_confusion_matrix(grid,X_test,y_test);

grid_pred = grid.predict(X_test) print(classification_report(y_test,grid_pred))

from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix

# first create a confusion matrix cm = confusion_matrix(y_test,grid_pred) # then use confusion matrix and grid.classes_ to create the plot ConfusionMatrixDisplay(cm,display_labels=grid.classes_).plot();