Proyect PCA applied to images
Database description
Downloading the database
!pip install imageio==2.23.0
import numpy as np
import imageio
import matplotlib.pyplot as plt
import pandas as pd
# Images Dataset
import sklearn.datasets
data= sklearn.datasets.fetch_olivetti_faces()
data.keys()
data["images"]
np.amax(data["images"])
np.amin(data["images"])
data["images"][0].shape
Plotting the images
fig, ax = plt.subplots(
1,2
, figsize=(8,8)
, subplot_kw={"xticks":[],"yticks":[]}
)
ax[0].imshow(data["images"][22], cmap ="gray")
ax[1].imshow(data["images"][23], cmap ="gray")
caras = pd.DataFrame([])
counter = 0
# i is each matrix of "data["images"]"
for i in data["images"]:
cara = pd.Series(i.flatten(), name=counter)
caras = caras.append(cara)
counter = counter+1
fig, axes = plt.subplots(
5,10
, figsize=(15,8)
, subplot_kw={"xticks":[],"yticks":[]}
, gridspec_kw=dict(hspace=0.01, wspace=0.01)
)
for i, ax in enumerate(axes.flat):
ax.imshow(caras.iloc[i].values.reshape(64,64), cmap="gray")
Reducing dimensions of image dataset using PCA
from sklearn.decomposition import PCA
50% of variance
#n_components=0.80 means it will return the Eigenvectors that have the 80% of the variation in the dataset
caras_pca = PCA(n_components=0.5)
caras_pca.fit(caras)
rows = 1
columns = caras_pca.n_components_ // rows
fig, axes = plt.subplots(
rows,columns
, figsize=(12,6)
, subplot_kw={"xticks":[],"yticks":[]}
, gridspec_kw=dict(hspace=0.01, wspace=0.01)
)
for i, ax in enumerate(axes.flat):
ax.imshow(caras_pca.components_[i].reshape(64,64), cmap="gray")
print(caras_pca.n_components_)
components = caras_pca.transform(caras)
proyection = caras_pca.inverse_transform(components)
fig, axes = plt.subplots(
5,10
, figsize=(15,8)
, subplot_kw={"xticks":[],"yticks":[]}
, gridspec_kw=dict(hspace=0.01, wspace=0.01)
)
for i, ax in enumerate(axes.flat):
ax.imshow(proyection[i].reshape(64,64), cmap="gray")
80% of variance
caras_pca = PCA(n_components=0.8)
caras_pca.fit(caras)
rows = 1
columns = caras_pca.n_components_ // rows
fig, axes = plt.subplots(
rows,columns
, figsize=(12,6)
, subplot_kw={"xticks":[],"yticks":[]}
, gridspec_kw=dict(hspace=0.01, wspace=0.01)
)
for i, ax in enumerate(axes.flat):
ax.imshow(caras_pca.components_[i].reshape(64,64), cmap="gray")
print(caras_pca.n_components_)
components = caras_pca.transform(caras)
proyection = caras_pca.inverse_transform(components)
fig, axes = plt.subplots(
5,10
, figsize=(15,8)
, subplot_kw={"xticks":[],"yticks":[]}
, gridspec_kw=dict(hspace=0.01, wspace=0.01)
)
for i, ax in enumerate(axes.flat):
ax.imshow(proyection[i].reshape(64,64), cmap="gray")
99.9 % of variance
caras_pca = PCA(n_components=0.999)
caras_pca.fit(caras)
rows = 1
columns = caras_pca.n_components_ // rows
fig, axes = plt.subplots(
rows,columns
, figsize=(12,6)
, subplot_kw={"xticks":[],"yticks":[]}
, gridspec_kw=dict(hspace=0.01, wspace=0.01)
)
for i, ax in enumerate(axes.flat):
ax.imshow(caras_pca.components_[i].reshape(64,64), cmap="gray")
print(caras_pca.n_components_)
components = caras_pca.transform(caras)
proyection = caras_pca.inverse_transform(components)
fig, axes = plt.subplots(
5,10
, figsize=(15,8)
, subplot_kw={"xticks":[],"yticks":[]}
, gridspec_kw=dict(hspace=0.01, wspace=0.01)
)
for i, ax in enumerate(axes.flat):
ax.imshow(proyection[i].reshape(64,64), cmap="gray")