Cassava Leaf Disease Classification
Extract
import zipfile,os
local_zip = 'raw_dataset.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp/raw_dataset')
zip_ref.close()
Dataset Chart
import matplotlib.pyplot as plt
import os
base_dir = '/tmp/raw_dataset'
dirs = ['brown_leaf_spot', 'brown_streak_disease', 'green_might_damage', 'healthy', 'mosaic_disease', 'read_mite_damage']
counts = []
for dir in dirs:
dir_path = os.path.join(base_dir, dir)
files = os.listdir(dir_path)
counts.append(len(files))
# Define a color list
colors = ['b', 'g', 'r', 'c', 'm', 'y']
plt.figure(figsize=(10,5))
# Pass the color list to the color parameter
plt.bar(dirs, counts, color=colors)
plt.title('Number of images in each category')
plt.xlabel('Category')
plt.ylabel('Number of images')
plt.xticks(rotation=45)
plt.show()
Split Image
import splitfolders
import shutil
old_tmp_folder = "/tmp/raw_dataset/"
new_tmp_folder = "/tmp/master_dataset"
splitfolders.ratio(old_tmp_folder, output=new_tmp_folder, seed=42, ratio=(.8, .2))
if os.path.isdir(old_tmp_folder):
shutil.rmtree(old_tmp_folder)
base_dir = '/tmp/master_dataset'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'val')
Augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255, # Normalizes the pixel values to [0,1]
rotation_range=30, # Randomly rotates the image in the range [-30 degrees, 30 degrees]
shear_range=0.2, # Randomly applies shear transformations
zoom_range=0.2, # Randomly zooms inside the pictures
horizontal_flip=True, # Randomly flips the image horizontally
vertical_flip=True, # Randomly flips the image vertically
fill_mode='nearest', # The strategy used for filling in newly created pixels
validation_split=0.1 # Reserves 10% of the training data as validation data
)
test_datagen = ImageDataGenerator(
rescale=1./255 # Normalizes the pixel values to [0,1]
)
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(300, 300),
batch_size=16,
class_mode='categorical',
)
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(300, 300),
batch_size=16,
class_mode='categorical',
)
def plot_images(images_arr, title):
fig, axes = plt.subplots(1, 5, figsize=(20, 20))
axes = axes.flatten()
for img, ax in zip(images_arr, axes):
ax.imshow(img)
ax.axis('off')
plt.tight_layout()
plt.figtext(0.5, 0.1, title, fontsize=20, ha='center') # Place the title at the bottom center
plt.show()
# Fetch a batch of images from the train_generator
for images_batch, labels_batch in train_generator:
break
# Display original images
plot_images(images_batch[:5], 'Original Images')
# Apply augmentation to the same batch and display
augmented_images = [train_datagen.random_transform(img) for img in images_batch]
plot_images(augmented_images[:5], 'Augmented Images')
# Count the number of images per class before augmentation
original_class_counts = {}
for class_name in train_generator.class_indices.keys():
class_path = os.path.join(train_dir, class_name)
original_class_counts[class_name] = len(os.listdir(class_path))
# Display the number of images per class before augmentation
print("Number of images per class before augmentation:")
for class_name, count in original_class_counts.items():
print(f"{class_name}: {count}")
# Since augmentation is done on-the-fly, the number of images after augmentation is theoretically infinite.
# However, we can calculate the number of augmented images generated per epoch during training.
augmented_images_per_epoch = train_generator.samples * train_generator.batch_size
# Display the calculated number of augmented images per epoch
print(f"Number of augmented images generated per epoch: {augmented_images_per_epoch}")
Class
train_generator.class_indices
Layers
import tensorflow as tf
from tensorflow.keras.layers import Input
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(300, 300, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dense(6, activation='softmax')
])
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
base_model = EfficientNetB0(include_top=False, input_shape=(300, 300, 3), weights='imagenet')
base_model.trainable = False # Freeze the base model
model = tf.keras.models.Sequential([
base_model,
GlobalAveragePooling2D(),
Dense(512, activation='relu'),
Dropout(0.5),
Dense(256, activation='relu'),
Dropout(0.5),
Dense(6, activation='softmax')
])
model.summary()
model.compile(optimizer=tf.optimizers.Adam(),
loss='categorical_crossentropy',
metrics = ['accuracy'])
Train
history = model.fit(
train_generator,
validation_data=validation_generator,
epochs=30,
verbose=2
)
Accuracy
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Akurasi Model')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import numpy as np
# Predict the values from the validation dataset
Y_pred = model.predict(validation_generator)
# Convert predictions classes to one hot vectors
Y_pred_classes = np.argmax(Y_pred, axis=1)
# Convert validation observations to one hot vectors
Y_true = validation_generator.classes
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes)
# plot the confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mtx, display_labels=validation_generator.class_indices.keys())
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()
Save Model
model.save('caledi.h5')
!tensorflowjs_converter --input_format=keras caledi.h5 model-30