# setup
import numpy as np
import sklearn
import os
import tensorflow as tf
#tensorflow setup
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#extras
#from tensorflow import keras
#import cv2 #dont need
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt
#since I edited the requirements txt I need to make sure the notebook is referencing it
#pip install -r requirements.txt
#ended up doing this later in the preprocessing of modelt
#train = ImageDataGenerator(rescale=1/255)
#test = ImageDataGenerator(rescale=1/255)
#train dataset
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
'/work/doesimagehavefish/BLUENET DATASET',
labels='inferred',
label_mode="binary",
class_names=['FISH','NONFISH'], #need to match folder names
color_mode='rgb',
batch_size=32,
image_size=(150,150),
#target_size = (150, 150),
shuffle = True,
seed = 42,
validation_split = 0.20,
subset = "training",
)
test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
'/work/doesimagehavefish/BLUENET DATASET',
labels='inferred', #based on folder names
label_mode="binary",
class_names=['FISH','NONFISH'],
color_mode='rgb',
batch_size=32,
image_size=(150,150),
#target_size = (150, 150),
shuffle = True,
seed = 42,
validation_split = 0.20,
subset = "validation"
)
#will reference these three variables later on
img_height = 150
img_width = 150
batch_size = 32
class_names = train_dataset.class_names
print(class_names)
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 10))
for images, labels in train_dataset.take(1):
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
#plt.title(class_names([labels[i])
#each image is not labelled fish/ non fish they're just in folders so I couldnt label them here
plt.axis("off")
#make sure the data is float32 - quicker than 64
# divide by 255 to normalize the data and make sure all image sizes are the same
#train_dataset = train_dataset.astype("float32") / 255.0
#test_dataset = test_dataset.astype("float32") / 255.0
#test_dataset.class_indices
#BatchDataset' object has no attribute 'class_indices' - ERROR
#fixing this should fix labels printed in predictions
#use autotune to better allocate run time per parameter/ step
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_dataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = test_dataset.cache().prefetch(buffer_size=AUTOTUNE)
#keras.layers rescaling is built in
normalization_layer = layers.Rescaling(1./255)
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))
#tensorflow model from tensorflow documentation
num_classes = len(class_names)
modelt = keras.Sequential([
layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
modelt.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
modelt.summary()
#29ms/ step
#<2 min to run
epochs=10
historyt = modelt.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
acc = historyt.history['accuracy']
val_acc = historyt.history['val_accuracy']
loss = historyt.history['loss']
val_loss = historyt.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
#was overfitting a lot before normailzation
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal",
input_shape=(img_height,
img_width,
3)),
layers.RandomRotation(0.1),
layers.RandomZoom(0.1),
]
)
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
for i in range(9):
augmented_images = data_augmentation(images)
ax = plt.subplot(3, 3, i + 1)
plt.imshow(augmented_images[0].numpy().astype("uint8"))
plt.axis("off")
modelt = keras.Sequential([
data_augmentation,
layers.Rescaling(1./255),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Dropout(0.2),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
modelt.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
modelt.summary()
#37 ms/ step
#3 min to run
epochs = 15
historyt = modelt.fit(
train_ds,
validation_data=val_ds,
epochs=epochs
)
acc = historyt.history['accuracy']
val_acc = historyt.history['val_accuracy']
loss = historyt.history['loss']
val_loss = historyt.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
#looks better!
plt.plot(historyt.history['accuracy'], label='accuracy')
plt.plot(historyt.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
#test_loss, test_acc = modelt.evaluate(test_ds, test_ds_labels, verbose=2)
#sunflower_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg"
#sunflower_path = tf.keras.utils.get_file('Red_sunflower', origin=sunflower_url)
img_path = 'reeffish.jpeg'
img = tf.keras.utils.load_img(
img_path, target_size=(img_height, img_width)
)
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
predictions = modelt.predict(img_array)
score = tf.nn.softmax(predictions[0])
plt.imshow(img)
print(
"This image most likely belongs to {} with a {:.2f} percent confidence."
.format(class_names[np.argmax(score)], 100 * np.max(score))
)
model = keras.Sequential()
# convolutional layer and maxpool layer 1
model.add(keras.layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3)))
model.add(keras.layers.MaxPool2D(2,2))
# convolutional layer and maxpool layer 2
model.add(keras.layers.Conv2D(64,(3,3),activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))
# convolutional layer and maxpool layer 3
model.add(keras.layers.Conv2D(128,(3,3),activation='relu'))
model.add(keras.layers.MaxPool2D(2,2))
# convolutional layer and maxpool layer 4
model.add(keras.layers.Conv2D(128,(3,3),activation='relu')) #could do model2 and change the # features per layer
model.add(keras.layers.MaxPool2D(2,2))
# layer to flatten the resulting image array (multiple dimensions) to 1D array
model.add(keras.layers.Flatten())
# hidden layer with 512 neurons and Rectified Linear Unit activation function
model.add(keras.layers.Dense(512,activation='relu'))
# output layer with single neuron which gives 0 for fish or 1 for non fish
#here we use sigmoid activation function which makes our model output to lie between 0 and 1
model.add(keras.layers.Dense(1,activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
#tried 250 steps per epoch and it took too long to run, stopped because there wasnt enough data and overfit
#model.fit(train_dataset,
# steps_per_epoch = 250,
# epochs = 10,
# validation_data = test_dataset
# )
#then tried to reduce to 90
#still not enough data
#model.fit(train_dataset,
# steps_per_epoch = 90, #90 epochs - accuracy got higher much faster than 250 (whcich was too many for the size of my dataset)
# epochs = 10,
# validation_data = test_dataset
# )
#did some math and realized 10 might be a safer bet
#took 15 minutes to run 5.7
#
#w/ 10 steps per epoch
model.fit(train_dataset,
steps_per_epoch = 10, #90 was still to high - ran out of data at epoch 4
epochs = 10,
validation_data = test_dataset
)
#keeping num steps same, changing num epochs
#took less time - maybe 8 or 9 min
model.fit(train_dataset,
steps_per_epoch = 10, #same as above
epochs = 5, #was 10, now 5
validation_data = test_dataset
)
#less than 10 s to run w/ GPU
#low epochs and low steps to run quickly
#
epochs = 5
history = model.fit(train_dataset,
steps_per_epoch = 5, #lower than I'd like (I think)
epochs = epochs, #was 10, now 5
validation_data = test_dataset
)
epochs=5
history = model.fit(
train_dataset,
validation_data = test_dataset,
epochs=epochs,
steps_per_epoch = 50
)
#w/o GPU - 2s/step - 9min for 5 epochs w/ 5 steps each
epochs= 10
history = model.fit(
train_dataset,
validation_data = test_dataset,
epochs=epochs
)
#321 steps / 10 epochs
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
img_height = 150
img_width = 150
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal",
input_shape=(img_height,
img_width,
3)),
layers.RandomRotation(0.1),
layers.RandomZoom(0.1),
]
)
#321 steps - 53 ms/step (w/ GPU)
#3 min to run - 50% accuracy
epochs= 10
history_aug = model.fit(
train_dataset,
validation_data = test_dataset,
epochs=epochs
)
#Let's visualize what a few augmented examples look like by applying data augmentation to the same image several times:
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
for i in range(9):
augmented_images = data_augmentation(images)
ax = plt.subplot(3, 3, i + 1)
plt.imshow(augmented_images[0].numpy().astype("uint8"))
plt.axis("off")
img_path = 'fish121.jpeg'
img = tf.keras.utils.load_img(
img_path, target_size=(img_height, img_width)
)
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
plt.imshow(img)
print(
"This image most likely belongs to {} with a {:.2f} percent confidence."
.format(class_names[np.argmax(score)], 100 * np.max(score))
)
def predictImage(filename):
img1 = image.load_img(filename,target_size=(150,150))
plt.imshow(img1)
Y = image.img_to_array(img1)
X = np.expand_dims(Y,axis=0)
val = model.predict(X)
print(val)
if val >= 1: #should be == 1 changed to >=
plt.xlabel("fish ",fontsize=30)
elif val >= 0: #should be == 0 changed to <=
plt.xlabel("non fish",fontsize=30)
predictImage('coralreef.jpg')
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp
from sklearn.metrics import roc_auc_score
eval_modelt = modelt.evaluate(val_ds)
#eval_model = model.evaluate(validation_data)
eval_model = model.evaluate(val_ds)