## Import des librairies et modules
# Calculs & dataframes
import pandas as pd
import numpy as np
from math import pi
# ACP et K-means
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler # pour centrer-réduire les données
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
# Régression logistique
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score, plot_roc_curve
from sklearn.model_selection import train_test_split
from scipy.stats import norm
# Librairies graphiques
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
pal = dict({True: "limegreen",
False: "crimson"
})
list_pc = ['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6']
variables = ["diagonal",
"height_left",
"height_right",
"margin_low",
"margin_up",
"length"]
# Nombre de clusters souhaités
n_clust = 2
Nombre d'itérations pour atteindre la convergence : 5
Inertie : 627.2183861833662
Position des centroïdes :
[[-0.06412378 0.64653217 0.72076527 0.81585838 0.64246416 -0.87782484]
[ 0.05309173 -0.53530083 -0.59676264 -0.67549565 -0.53193269 0.72680122]]
variables = ["margin_low",
"margin_up",
"length",
#"diagonal", # non significatif p > 0.05
#"height_left", # non significatif p > 0.05
#"height_right", # non significatif p > 0.05
]
Rapport d'évaluation du modèle (données d'apprentissage) :
precision recall f1-score support
False 0.98 1.00 0.99 58
True 1.00 0.99 0.99 78
accuracy 0.99 136
macro avg 0.99 0.99 0.99 136
weighted avg 0.99 0.99 0.99 136
Rapport d'évaluation du modèle (données de test) :
precision recall f1-score support
False 1.00 1.00 1.00 12
True 1.00 1.00 1.00 22
accuracy 1.00 34
macro avg 1.00 1.00 1.00 34
weighted avg 1.00 1.00 1.00 34
Les p-valeurs < 0.05 indiquent les variables significatives.
Faux-négatif :
is_genuine proba_true
id
billet_69 True 0.470943
Score AUC : 1.0
detecteur_acp("dataset/exemple_oc.csv")
id diagnostic proba_true_%
0 A_1 predict_false 2.503616
1 A_2 predict_false 0.262961
2 A_3 predict_false 1.351174
3 A_4 predict_true 96.295009
4 A_5 predict_true 99.735291