L'analyse en Composante Principale
Definition
Avantages & Inconvénients
Démonstration
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import load_digits
digits = load_digits()
X = digits.data
y = digits.target
f, axarr = plt.subplots(1,2)
axarr[0].imshow(X[0].reshape(8,8))
axarr[1].imshow(X[1].reshape(8,8))
plt.show()
model = PCA(n_components=2)
X_2D = model.fit_transform(X)
plt.scatter(X_2D[:,0], X_2D[:,1], c=y)
plt.colorbar()
plt.show()
model = PCA(n_components=64)
X_reduced = model.fit_transform(X)
plt.plot(np.cumsum(model.explained_variance_ratio_))
plt.show()
n_comp = np.argmax(np.cumsum(model.explained_variance_ratio_) > 0.97)
model = PCA(n_components=n_comp)
X_opti = model.fit_transform(X)
X_opti_img = model.inverse_transform(X_opti)
f, axarr = plt.subplots(1,2)
axarr[0].imshow(X_opti_img[0].reshape(8,8))
axarr[1].imshow(X[0].reshape(8,8))
plt.show()