Exo 1: Classifieur multi-dimensionnel
import numpy as np
X_train = np.loadtxt('ressources/SY32_P20_TD02_data_X_train.csv', ndmin=2)
Y_train = np.loadtxt('ressources/SY32_P20_TD02_data_y_train.csv')
X_test = np.loadtxt('ressources/SY32_P20_TD02_data_X_test.csv', ndmin=2)
from ressources.catClassifier import CatClassifierMultiDim
clf = CatClassifierMultiDim()
clf.fit(X_train,Y_train)
print(clf.h_hat)
print(clf.d_hat)
print(clf.z_hat)
from ressources.catClassifierPlot import plotCatClassifier
plotCatClassifier(clf)
46.679
1
-1
Exo 2: Adaboost
Sans validation croisée
from ressources.catClassifier import CatClassifierBoost
list_K = [3,10,20,30,40]
list_err = []
for k in list_K:
clf = CatClassifierBoost()
clf.fit(X_train,Y_train,k)
plotCatClassifier(clf)
list_err.append(np.mean(clf.predict(X_train) != Y_train))
print(list_err[-1])
0.04
0.02666666666666667
0.03
0.023333333333333334
0.016666666666666666
Validation croisée
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10)
list_K = [3,10,20,30,40]
list_err = []
for k in list_K:
list_err_k=[]
for train,val in skf.split(X_train,Y_train):
clf = CatClassifierBoost()
clf.fit(X_train[train],Y_train[train],k)
list_err_k.append(np.mean(clf.predict(X_train[val]) != Y_train[val]))
list_err.append(np.mean(list_err_k))
print(list_err[-1])
0.056666666666666664
0.05
0.05
0.05
0.05
Chercher k optimale
best_k = 1
best_err = np.Inf
list_err = []
for k in list_K:
list_err_k=[]
for train,val in skf.split(X_train,Y_train):
clf = CatClassifierBoost()
clf.fit(X_train[train],Y_train[train],k)
list_err_k.append(np.mean(clf.predict(X_train[val]) != Y_train[val]))
list_err.append(np.mean(list_err_k))
# print(list_err[-1])
if list_err[-1] < best_err:
best_err = list_err[-1]
best_k = k
print('Meilleur k: {}'.format(k))
y_test = clf.predict(X_test)
np.savetxt('y_test.txt',y_test,fmt ='%d')
Meilleur k: 40