DSC 344 Final Project

# setup import numpy as np import sklearn import os import tensorflow as tf #tensorflow setup from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.preprocessing.image import ImageDataGenerator #extras #from tensorflow import keras #import cv2 #dont need from tensorflow.keras.preprocessing import image import matplotlib.pyplot as plt #since I edited the requirements txt I need to make sure the notebook is referencing it #pip install -r requirements.txt

#ended up doing this later in the preprocessing of modelt #train = ImageDataGenerator(rescale=1/255) #test = ImageDataGenerator(rescale=1/255) #train dataset train_dataset = tf.keras.preprocessing.image_dataset_from_directory( '/work/doesimagehavefish/BLUENET DATASET', labels='inferred', label_mode="binary", class_names=['FISH','NONFISH'], #need to match folder names color_mode='rgb', batch_size=32, image_size=(150,150), #target_size = (150, 150), shuffle = True, seed = 42, validation_split = 0.20, subset = "training", ) test_dataset = tf.keras.preprocessing.image_dataset_from_directory( '/work/doesimagehavefish/BLUENET DATASET', labels='inferred', #based on folder names label_mode="binary", class_names=['FISH','NONFISH'], color_mode='rgb', batch_size=32, image_size=(150,150), #target_size = (150, 150), shuffle = True, seed = 42, validation_split = 0.20, subset = "validation" )

#will reference these three variables later on img_height = 150 img_width = 150 batch_size = 32

class_names = train_dataset.class_names print(class_names)

import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) for images, labels in train_dataset.take(1): for i in range(9): ax = plt.subplot(3, 3, i + 1) plt.imshow(images[i].numpy().astype("uint8")) #plt.title(class_names([labels[i]) #each image is not labelled fish/ non fish they're just in folders so I couldnt label them here plt.axis("off")

#make sure the data is float32 - quicker than 64 # divide by 255 to normalize the data and make sure all image sizes are the same #train_dataset = train_dataset.astype("float32") / 255.0 #test_dataset = test_dataset.astype("float32") / 255.0

#test_dataset.class_indices #BatchDataset' object has no attribute 'class_indices' - ERROR #fixing this should fix labels printed in predictions

#use autotune to better allocate run time per parameter/ step AUTOTUNE = tf.data.AUTOTUNE train_ds = train_dataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE) val_ds = test_dataset.cache().prefetch(buffer_size=AUTOTUNE)

#keras.layers rescaling is built in normalization_layer = layers.Rescaling(1./255)

normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) image_batch, labels_batch = next(iter(normalized_ds)) first_image = image_batch[0] # Notice the pixel values are now in `[0,1]`. print(np.min(first_image), np.max(first_image))

#tensorflow model from tensorflow documentation num_classes = len(class_names) modelt = keras.Sequential([ layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)), layers.Conv2D(16, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Conv2D(32, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Conv2D(64, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dense(num_classes) ])

modelt.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

modelt.summary()

#29ms/ step #<2 min to run epochs=10 historyt = modelt.fit( train_ds, validation_data=val_ds, epochs=epochs )

acc = historyt.history['accuracy'] val_acc = historyt.history['val_accuracy'] loss = historyt.history['loss'] val_loss = historyt.history['val_loss'] epochs_range = range(epochs) plt.figure(figsize=(8, 8)) plt.subplot(1, 2, 1) plt.plot(epochs_range, acc, label='Training Accuracy') plt.plot(epochs_range, val_acc, label='Validation Accuracy') plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2) plt.plot(epochs_range, loss, label='Training Loss') plt.plot(epochs_range, val_loss, label='Validation Loss') plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.show() #was overfitting a lot before normailzation

data_augmentation = keras.Sequential( [ layers.RandomFlip("horizontal", input_shape=(img_height, img_width, 3)), layers.RandomRotation(0.1), layers.RandomZoom(0.1), ] )

plt.figure(figsize=(10, 10)) for images, _ in train_ds.take(1): for i in range(9): augmented_images = data_augmentation(images) ax = plt.subplot(3, 3, i + 1) plt.imshow(augmented_images[0].numpy().astype("uint8")) plt.axis("off")

modelt = keras.Sequential([ data_augmentation, layers.Rescaling(1./255), layers.Conv2D(16, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Conv2D(32, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Conv2D(64, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.2), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dense(num_classes) ])

modelt.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) modelt.summary()

#37 ms/ step #3 min to run epochs = 15 historyt = modelt.fit( train_ds, validation_data=val_ds, epochs=epochs )

acc = historyt.history['accuracy'] val_acc = historyt.history['val_accuracy'] loss = historyt.history['loss'] val_loss = historyt.history['val_loss'] epochs_range = range(epochs) plt.figure(figsize=(8, 8)) plt.subplot(1, 2, 1) plt.plot(epochs_range, acc, label='Training Accuracy') plt.plot(epochs_range, val_acc, label='Validation Accuracy') plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2) plt.plot(epochs_range, loss, label='Training Loss') plt.plot(epochs_range, val_loss, label='Validation Loss') plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.show() #looks better!

plt.plot(historyt.history['accuracy'], label='accuracy') plt.plot(historyt.history['val_accuracy'], label = 'val_accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.ylim([0.5, 1]) plt.legend(loc='lower right') #test_loss, test_acc = modelt.evaluate(test_ds, test_ds_labels, verbose=2)

#sunflower_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg" #sunflower_path = tf.keras.utils.get_file('Red_sunflower', origin=sunflower_url) img_path = 'reeffish.jpeg' img = tf.keras.utils.load_img( img_path, target_size=(img_height, img_width) ) img_array = tf.keras.utils.img_to_array(img) img_array = tf.expand_dims(img_array, 0) # Create a batch predictions = modelt.predict(img_array) score = tf.nn.softmax(predictions[0]) plt.imshow(img) print( "This image most likely belongs to {} with a {:.2f} percent confidence." .format(class_names[np.argmax(score)], 100 * np.max(score)) )

model = keras.Sequential() # convolutional layer and maxpool layer 1 model.add(keras.layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3))) model.add(keras.layers.MaxPool2D(2,2)) # convolutional layer and maxpool layer 2 model.add(keras.layers.Conv2D(64,(3,3),activation='relu')) model.add(keras.layers.MaxPool2D(2,2)) # convolutional layer and maxpool layer 3 model.add(keras.layers.Conv2D(128,(3,3),activation='relu')) model.add(keras.layers.MaxPool2D(2,2)) # convolutional layer and maxpool layer 4 model.add(keras.layers.Conv2D(128,(3,3),activation='relu')) #could do model2 and change the # features per layer model.add(keras.layers.MaxPool2D(2,2)) # layer to flatten the resulting image array (multiple dimensions) to 1D array model.add(keras.layers.Flatten()) # hidden layer with 512 neurons and Rectified Linear Unit activation function model.add(keras.layers.Dense(512,activation='relu')) # output layer with single neuron which gives 0 for fish or 1 for non fish #here we use sigmoid activation function which makes our model output to lie between 0 and 1 model.add(keras.layers.Dense(1,activation='sigmoid'))

model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

#tried 250 steps per epoch and it took too long to run, stopped because there wasnt enough data and overfit #model.fit(train_dataset, # steps_per_epoch = 250, # epochs = 10, # validation_data = test_dataset # )

#then tried to reduce to 90 #still not enough data #model.fit(train_dataset, # steps_per_epoch = 90, #90 epochs - accuracy got higher much faster than 250 (whcich was too many for the size of my dataset) # epochs = 10, # validation_data = test_dataset # )

#did some math and realized 10 might be a safer bet #took 15 minutes to run 5.7 # #w/ 10 steps per epoch model.fit(train_dataset, steps_per_epoch = 10, #90 was still to high - ran out of data at epoch 4 epochs = 10, validation_data = test_dataset )

#keeping num steps same, changing num epochs #took less time - maybe 8 or 9 min model.fit(train_dataset, steps_per_epoch = 10, #same as above epochs = 5, #was 10, now 5 validation_data = test_dataset ) #less than 10 s to run w/ GPU

#low epochs and low steps to run quickly # epochs = 5 history = model.fit(train_dataset, steps_per_epoch = 5, #lower than I'd like (I think) epochs = epochs, #was 10, now 5 validation_data = test_dataset )

epochs=5 history = model.fit( train_dataset, validation_data = test_dataset, epochs=epochs, steps_per_epoch = 50 ) #w/o GPU - 2s/step - 9min for 5 epochs w/ 5 steps each

epochs= 10 history = model.fit( train_dataset, validation_data = test_dataset, epochs=epochs ) #321 steps / 10 epochs

acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs_range = range(epochs) plt.figure(figsize=(8, 8)) plt.subplot(1, 2, 1) plt.plot(epochs_range, acc, label='Training Accuracy') plt.plot(epochs_range, val_acc, label='Validation Accuracy') plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2) plt.plot(epochs_range, loss, label='Training Loss') plt.plot(epochs_range, val_loss, label='Validation Loss') plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.show()

img_height = 150 img_width = 150 data_augmentation = keras.Sequential( [ layers.RandomFlip("horizontal", input_shape=(img_height, img_width, 3)), layers.RandomRotation(0.1), layers.RandomZoom(0.1), ] )

#321 steps - 53 ms/step (w/ GPU) #3 min to run - 50% accuracy epochs= 10 history_aug = model.fit( train_dataset, validation_data = test_dataset, epochs=epochs )

#Let's visualize what a few augmented examples look like by applying data augmentation to the same image several times: plt.figure(figsize=(10, 10)) for images, _ in train_ds.take(1): for i in range(9): augmented_images = data_augmentation(images) ax = plt.subplot(3, 3, i + 1) plt.imshow(augmented_images[0].numpy().astype("uint8")) plt.axis("off")

img_path = 'fish121.jpeg' img = tf.keras.utils.load_img( img_path, target_size=(img_height, img_width) ) img_array = tf.keras.utils.img_to_array(img) img_array = tf.expand_dims(img_array, 0) # Create a batch predictions = model.predict(img_array) score = tf.nn.softmax(predictions[0]) plt.imshow(img) print( "This image most likely belongs to {} with a {:.2f} percent confidence." .format(class_names[np.argmax(score)], 100 * np.max(score)) )

def predictImage(filename): img1 = image.load_img(filename,target_size=(150,150)) plt.imshow(img1) Y = image.img_to_array(img1) X = np.expand_dims(Y,axis=0) val = model.predict(X) print(val) if val >= 1: #should be == 1 changed to >= plt.xlabel("fish ",fontsize=30) elif val >= 0: #should be == 0 changed to <= plt.xlabel("non fish",fontsize=30)

predictImage('coralreef.jpg')

from sklearn.multiclass import OneVsRestClassifier from scipy import interp from sklearn.metrics import roc_auc_score

eval_modelt = modelt.evaluate(val_ds) #eval_model = model.evaluate(validation_data)

eval_model = model.evaluate(val_ds)