import tensorflow as tf
import sklearn
import numpy as np
import pandas as pd
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import PIL
from PIL import Image
plane = Image.open('3096_colorPlane.jpg')
bird = Image.open('42049_colorBird.jpg')
plane_data = np.asarray(plane) /255
bird_data = np.asarray(bird) /255 #Normalize RGB values
print(plane_data.shape)
print(bird_data.shape)
#Normalizing distances from top left and creating unit hypercube vectors
plane_datacube = []
for i in range(len(plane_data)):
for j in range(len(plane_data[1])):
vector = []
vector.append(j/(len(plane_data[1])))
vector.append(1-(i/(len(plane_data))))
vector.append(plane_data[i][j][0])
vector.append(plane_data[i][j][1])
vector.append(plane_data[i][j][2])
plane_datacube.append(vector)
bird_datacube = []
for i in range(len(bird_data)):
for j in range(len(bird_data[1])):
vector = []
vector.append(j/(len(bird_data[1])))
vector.append(1-(i/(len(bird_data))))
vector.append(bird_data[i][j][0])
vector.append(bird_data[i][j][1])
vector.append(bird_data[i][j][2])
bird_datacube.append(vector)
plane_datacube_ds = np.asarray(plane_datacube[0::100])
bird_datacube_ds = np.asarray(bird_datacube[0::100])
from sklearn.decomposition import PCA
pca=PCA(n_components=5)
pca.fit(plane_datacube)
plane_values = pca.singular_values_
plane_explained_variance = pca.explained_variance_ratio_
PC_values = np.arange(pca.n_components_) + 1
pca.fit(bird_datacube)
bird_values = pca.singular_values_
bird_explained_variance = pca.explained_variance_ratio_
print(plane_values)
print(bird_values)
#Plotting principal component vs explained variance
fig, axs = plt.subplots(2, 1, figsize=(5,5))
axs[0].plot(PC_values, plane_explained_variance, 'o-', linewidth=2, color='blue')
axs[0].set_title('Scree Plot Plane')
axs[0].set_xlabel('Principal Component')
axs[0].set_ylabel('Explained Variance')
axs[1].plot(PC_values, bird_explained_variance, 'o-', linewidth=2, color='blue')
axs[1].set_title('Scree Plot Bird')
axs[1].set_xlabel('Principal Component')
axs[1].set_ylabel('Explained Variance')
fig.tight_layout()
plane_datacube = np.asarray(plane_datacube)
plane_cov = (np.matmul(plane_datacube_ds.T, plane_datacube_ds))/(len(plane_datacube_ds))
from scipy.linalg import eigh
plane_eigval, plane_eigvec = eigh(plane_cov, eigvals=(3,4))
plane_pca_points = np.matmul(plane_eigvec.T, plane_datacube_ds.T)
plt.scatter(plane_pca_points[0],plane_pca_points[1], s=0.1)
bird_datacube = np.asarray(bird_datacube)
bird_cov = (np.matmul(bird_datacube_ds.T, bird_datacube_ds))/(len(bird_datacube_ds))
bird_cov.shape
from scipy.linalg import eigh
from mpl_toolkits import mplot3d
bird_eigval, bird_eigvec = eigh(bird_cov, eigvals=(2,4))
bird_pca_points = np.matmul(bird_eigvec.T, bird_datacube_ds.T)
ax = plt.axes(projection='3d')
ax.scatter(bird_pca_points[0],bird_pca_points[1], bird_pca_points[2], s=0.1)
from sklearn.manifold import TSNE
fig, axs = plt.subplots(2,3, figsize=(10, 10))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
plane_perp15 = []
plane_perp30 = []
plane_perp40 = []
bird_perp15 = []
bird_perp30 = []
bird_perp40 = []
tsnes = [plane_perp15,plane_perp30,plane_perp40,bird_perp15,bird_perp30,bird_perp40]
i=0
perplexities = [15,30,40]
for p in perplexities:
plane_embedded = TSNE(n_components=2, perplexity=p, learning_rate='auto', init='random').fit_transform(plane_datacube_ds)
bird_embedded = TSNE(n_components=2, perplexity=p, learning_rate='auto', init='random').fit_transform(bird_datacube_ds)
tsnes[i].append(plane_embedded)
tsnes[i+3].append(bird_embedded)
axs[i].scatter(plane_embedded[:,0], plane_embedded[:,1], s=1)
axs[i].set_title('Plane w/ Perplexity '+str(p))
axs[i+3].scatter(bird_embedded[:,0], bird_embedded[:,1], s=1)
axs[i+3].set_title('Bird w/ Perplexity '+str(p))
i+=1
from sklearn.cluster import KMeans
from sklearn import metrics
plane_kmeans = KMeans(n_clusters=5, random_state=0).fit(plane_datacube)
bird_kmeans = KMeans(n_clusters=5, random_state=0).fit(bird_datacube)
fig, axs = plt.subplots(2,2, figsize=(10, 10))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
for i in range(2,6):
plane_kmeans = KMeans(n_clusters=i, random_state=0).fit(plane_datacube)
plane_labels = plane_kmeans.labels_[0::100]
for j in range(i):
axs[i-2].scatter(plane_datacube_ds[plane_labels==j][:,0], plane_datacube_ds[plane_labels==j][:,1])
axs[i-2].set_title('K = '+str(i))
axs[i-2].text(0.5,-0.2, "CH index = " +str(metrics.calinski_harabasz_score(plane_datacube_ds, plane_labels)), size=12, ha="center")
fig, axs = plt.subplots(2,2, figsize=(10, 10))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
for i in range(2,6):
bird_kmeans = KMeans(n_clusters=i, random_state=0).fit(bird_datacube)
bird_labels = bird_kmeans.labels_[0::100]
for j in range(i):
axs[i-2].scatter(bird_datacube_ds[bird_labels==j][:,0], bird_datacube_ds[bird_labels==j][:,1])
axs[i-2].set_title('K = '+str(i))
axs[i-2].text(0.5,-0.2, "CH index = " +str(metrics.calinski_harabasz_score(bird_datacube_ds, bird_labels)), size=12, ha="center")
plt.scatter(plane_pca_points[0],plane_pca_points[1], s=1, c=(KMeans(n_clusters=5, random_state=0).fit(plane_datacube).labels_[0::100]))
ax = plt.axes(projection='3d')
ax.scatter(bird_pca_points[0],bird_pca_points[1], bird_pca_points[2], s=0.1, c=(KMeans(n_clusters=2, random_state=0).fit(bird_datacube).labels_[0::100]))
tsnes = np.asarray(tsnes)
fig, axs = plt.subplots(2,3, figsize=(10, 10))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
for i in range(3):
axs[i].scatter(tsnes[i][0][:,0], tsnes[i][0][:,1], s=1, c=(KMeans(n_clusters=5, random_state=0).fit(plane_datacube).labels_[0::100]))
axs[i].set_title('Plane w/ Perplexity '+str(perplexities[i]))
axs[i+3].scatter(tsnes[i+3][0][:,0], tsnes[i+3][0][:,1], s=1, c=(KMeans(n_clusters=2, random_state=0).fit(bird_datacube).labels_[0::100]))
axs[i+3].set_title('Bird w/ Perplexity '+str(perplexities[i]))
from sklearn.mixture import GaussianMixture
n_components = np.arange(1, 15)
plane_GM_models = [GaussianMixture(n, covariance_type='full', random_state=0).fit(plane_datacube) for n in n_components]
bird_GM_models = [GaussianMixture(n, covariance_type='full', random_state=0).fit(bird_datacube) for n in n_components]
plt.plot(n_components, [m.bic(plane_datacube) for m in plane_GM_models], label='pBIC')
plt.plot(n_components, [m.aic(plane_datacube) for m in plane_GM_models], label='pAIC')
plt.plot(n_components, [b.bic(bird_datacube) for b in bird_GM_models], label='bBIC')
plt.plot(n_components, [b.aic(bird_datacube) for b in bird_GM_models], label='bAIC')
plt.legend(loc='best')
plt.xlabel('n_components');
gmm = GaussianMixture(n_components=8)
plane_gmm = gmm.fit(plane_datacube)
plane_gmm_labels = gmm.predict(plane_datacube_ds)
bird_gmm = gmm.fit(bird_datacube)
bird_gmm_labels = gmm.predict(bird_datacube_ds)
fig, axs = plt.subplots(1,2, figsize=(15,5))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
axs[0].scatter(plane_datacube_ds[:, 0], plane_datacube_ds[:, 1], s=80, c=plane_gmm_labels, cmap='viridis');
axs[1].scatter(bird_datacube_ds[:, 0], bird_datacube_ds[:, 1], s=80, c=bird_gmm_labels, cmap='viridis');
plt.scatter(plane_pca_points[0],plane_pca_points[1], s=1, c=plane_gmm_labels)
ax = plt.axes(projection='3d')
ax.scatter(bird_pca_points[0],bird_pca_points[1], bird_pca_points[2], s=0.1, c=bird_gmm_labels)
fig, axs = plt.subplots(2,3, figsize=(10, 10))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
for i in range(3):
axs[i].scatter(tsnes[i][0][:,0], tsnes[i][0][:,1], s=1, c=plane_gmm_labels)
axs[i].set_title('Plane w/ Perplexity '+str(perplexities[i]))
axs[i+3].scatter(tsnes[i+3][0][:,0], tsnes[i+3][0][:,1], s=1, c=bird_gmm_labels)
axs[i+3].set_title('Bird w/ Perplexity '+str(perplexities[i]))
from sklearn.cluster import AgglomerativeClustering
plane_hc = AgglomerativeClustering(n_clusters=3, linkage='ward').fit(plane_datacube_ds)
bird_hc = AgglomerativeClustering(n_clusters=3, linkage='ward').fit(bird_datacube_ds)
fig, axs = plt.subplots(2,2, figsize=(10, 10))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
for i in range(2,6):
plane_hc = AgglomerativeClustering(n_clusters=i, linkage='ward').fit(plane_datacube_ds)
plane_labels = plane_hc.labels_
for j in range(i):
axs[i-2].scatter(plane_datacube_ds[plane_labels==j][:,0], plane_datacube_ds[plane_labels==j][:,1])
axs[i-2].set_title('C = '+str(i))
fig, axs = plt.subplots(2,2, figsize=(10, 10))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
for i in range(2,6):
bird_hc = AgglomerativeClustering(n_clusters=i, linkage='ward').fit(bird_datacube_ds)
bird_labels = bird_hc.labels_
for j in range(i):
axs[i-2].scatter(bird_datacube_ds[bird_labels==j][:,0], bird_datacube_ds[bird_labels==j][:,1])
axs[i-2].set_title('C = '+str(i))
plt.scatter(plane_pca_points[0],plane_pca_points[1], s=1, c=AgglomerativeClustering(n_clusters=5, linkage='ward').fit(plane_datacube_ds).labels_)
ax = plt.axes(projection='3d')
ax.scatter(bird_pca_points[0],bird_pca_points[1], bird_pca_points[2], s=0.1, c=AgglomerativeClustering(n_clusters=2, linkage='ward').fit(bird_datacube_ds).labels_)
fig, axs = plt.subplots(2,3, figsize=(10, 10))
fig.subplots_adjust(hspace = .4, wspace=.1)
axs = axs.ravel()
for i in range(3):
axs[i].scatter(tsnes[i][0][:,0], tsnes[i][0][:,1], s=1, c=AgglomerativeClustering(n_clusters=5, linkage='ward').fit(plane_datacube_ds).labels_)
axs[i].set_title('Plane w/ Perplexity '+str(perplexities[i]))
axs[i+3].scatter(tsnes[i+3][0][:,0], tsnes[i+3][0][:,1], s=1, c=AgglomerativeClustering(n_clusters=2, linkage='ward').fit(bird_datacube_ds).labels_)
axs[i+3].set_title('Bird w/ Perplexity '+str(perplexities[i]))