import numpy as np
import pandas as pd
import random
import os
from pathlib import Path
import tensorflow as tf
# visualisation
import matplotlib.pyplot as plt
from matplotlib.pyplot import rcParams
import seaborn as sns
%matplotlib inline
# keras libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.preprocessing.image import ImageDataGenerator, load_img
# image processing
import cv2
import glob
from PIL import Image
# surpress warning
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
!pip install kaggle
# API token to be uploaded here
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
from zipfile import ZipFile
file_name = "chest-xray-pneumonia.zip"
with ZipFile (file_name, 'r') as zip:
zip.extractall()
print('Done')
Done
main_path = Path("/content/chest_xray")
# parent directory
train_path = main_path / "train"
val_path = main_path / "val"
test_path = main_path / "test"
train_normal = glob.glob(str(train_path)+"/NORMAL/*.jpeg")
train_pneumonia = glob.glob(str(train_path)+"/PNEUMONIA/*.jpeg")
test_normal = glob.glob(str(test_path)+"/NORMAL/*.jpeg")
test_pneumonia = glob.glob(str(test_path)+"/PNEUMONIA/*.jpeg")
# obtaining file paths of the images in training folder and putting them in a dataframe
# categorising the file paths as normal or pneumonia
train_list = [x for x in train_normal]
train_list.extend([x for x in train_pneumonia])
df_train = pd.DataFrame([x for x in train_list], columns = ['feature'])
df_train['label'] = np.concatenate([['Normal']*len(train_normal) , ['Pneumonia']*len(train_pneumonia)])
df_train.head()
# visualising some of the images in the training set
fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
img = cv2.imread(train_pneumonia[i])
img = cv2.resize(img, (220,220))
ax.imshow(img)
ax.set_title("Pneumonia")
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
img = cv2.imread(train_normal[i])
img = cv2.resize(img, (220,220))
ax.imshow(img)
ax.set_title("Normal")
plt.show()
# obtaining file paths of the images in testing folder and putting them in a dataframe
# categorising the file paths as normal or pneumonia
test_list = [x for x in test_normal]
test_list.extend([x for x in test_pneumonia])
df_test = pd.DataFrame([x for x in test_list], columns = ['feature'])
df_test['label'] = np.concatenate([['Normal']*len(test_normal) , ['Pneumonia']*len(test_pneumonia)])
df_test.head()
# visualising some of the images in the test set
fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
img = cv2.imread(test_pneumonia[i])
img = cv2.resize(img, (220,220))
ax.imshow(img)
ax.set_title("Pneumonia")
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
img = cv2.imread(test_normal[i])
img = cv2.resize(img, (220,220))
ax.imshow(img)
ax.set_title("Normal")
plt.show()
f,ax=plt.subplots(1,2,figsize=(15,8))
df_train['label'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True, colors=sns.color_palette('PuBu'))
ax[0].set_title('NORMAL Vs. PNEUMONIA')
sns.countplot(x='label', data=df_train, palette="PuBu")
ax[1].set_title('NORMAL Vs. PNEUMONIA')
for p in ax[1].patches:
ax[1].annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+300), fontsize = 13)
plt.show()
f,ax=plt.subplots(1,2,figsize=(15,8))
df_test['label'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True, colors=sns.color_palette('Pastel2_r'))
ax[0].set_title('NORMAL Vs. PNEUMONIA')
sns.countplot(x='label', data=df_test, palette="Pastel2_r")
ax[1].set_title('NORMAL Vs. PNEUMONIA')
for p in ax[1].patches:
ax[1].annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+300), fontsize = 13)
plt.show()
# loading the image folders directly from directory
# images are shuffled, divided into specified batch size and resized
training_set = tf.keras.preprocessing.image_dataset_from_directory(train_path, shuffle=True, batch_size=32, image_size=(150, 150))
val_set = tf.keras.preprocessing.image_dataset_from_directory(val_path, shuffle=True, batch_size=32, image_size=(150, 150))
testing_set = tf.keras.preprocessing.image_dataset_from_directory(test_path, shuffle=True, batch_size=32, image_size=(150, 150))
Found 5216 files belonging to 2 classes.
Found 16 files belonging to 2 classes.
Found 624 files belonging to 2 classes.
data_augmentation = keras.Sequential([keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
keras.layers.experimental.preprocessing.RandomRotation(0.1),])
# visualising the result of the image augmentation
class_names = training_set.class_names
plt.figure(figsize=(12, 12))
for images, labels in training_set.take(1):
first_image = images[0]
for i in range(12):
ax = plt.subplot(3, 4, i + 1)
augmented_image = data_augmentation(
tf.expand_dims(first_image, 0))
plt.imshow(augmented_image[0].numpy().astype("int32"))
plt.title(class_names[labels[i]])
plt.axis("off")
# model is loaded with weights trained on ImageNet
# image was resized to 150 x 150 so input shape will follow suit
base_model = keras.applications.Xception(weights='imagenet', input_shape=(150, 150, 3), include_top=False)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
83689472/83683744 [==============================] - 1s 0us/step
83697664/83683744 [==============================] - 1s 0us/step
# base model layers frozen so they are not updated during training process
base_model.trainable = False
# standardising input size and applying data augmentation
inputs = keras.Input(shape=(150, 150, 3))
x = data_augmentation(inputs)
x = tf.keras.applications.xception.preprocess_input(x)
x = base_model(x, training=False) # batch normalisation layers are not updated at fine tuning stage
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x)
outputs = keras.layers.Dense(1)(x) #defininf the final output layer
model = keras.Model(inputs, outputs)
# model is first compiled with the loss is specified as such as there are 2 classes
# then trained for 20 epochs
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate), loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),metrics=keras.metrics.BinaryAccuracy())
model.fit(training_set, epochs=20, validation_data=val_set)
Epoch 1/20
163/163 [==============================] - 48s 203ms/step - loss: 0.5890 - binary_accuracy: 0.6609 - val_loss: 0.5760 - val_binary_accuracy: 0.6875
Epoch 2/20
163/163 [==============================] - 33s 192ms/step - loss: 0.3958 - binary_accuracy: 0.8393 - val_loss: 0.4635 - val_binary_accuracy: 0.7500
Epoch 3/20
163/163 [==============================] - 32s 190ms/step - loss: 0.3167 - binary_accuracy: 0.8725 - val_loss: 0.4306 - val_binary_accuracy: 0.7500
Epoch 4/20
163/163 [==============================] - 32s 190ms/step - loss: 0.2862 - binary_accuracy: 0.8844 - val_loss: 0.4182 - val_binary_accuracy: 0.7500
Epoch 5/20
163/163 [==============================] - 32s 190ms/step - loss: 0.2588 - binary_accuracy: 0.8936 - val_loss: 0.4083 - val_binary_accuracy: 0.8125
Epoch 6/20
163/163 [==============================] - 32s 189ms/step - loss: 0.2490 - binary_accuracy: 0.8976 - val_loss: 0.4055 - val_binary_accuracy: 0.8125
Epoch 7/20
163/163 [==============================] - 33s 192ms/step - loss: 0.2315 - binary_accuracy: 0.9070 - val_loss: 0.3966 - val_binary_accuracy: 0.8125
Epoch 8/20
163/163 [==============================] - 33s 193ms/step - loss: 0.2183 - binary_accuracy: 0.9139 - val_loss: 0.4018 - val_binary_accuracy: 0.8125
Epoch 9/20
163/163 [==============================] - 33s 191ms/step - loss: 0.2187 - binary_accuracy: 0.9093 - val_loss: 0.3985 - val_binary_accuracy: 0.8125
Epoch 10/20
163/163 [==============================] - 32s 189ms/step - loss: 0.2044 - binary_accuracy: 0.9199 - val_loss: 0.4061 - val_binary_accuracy: 0.8125
Epoch 11/20
163/163 [==============================] - 32s 189ms/step - loss: 0.2060 - binary_accuracy: 0.9210 - val_loss: 0.4064 - val_binary_accuracy: 0.8125
Epoch 12/20
163/163 [==============================] - 32s 189ms/step - loss: 0.1958 - binary_accuracy: 0.9212 - val_loss: 0.3946 - val_binary_accuracy: 0.8125
Epoch 13/20
163/163 [==============================] - 32s 190ms/step - loss: 0.1850 - binary_accuracy: 0.9250 - val_loss: 0.4032 - val_binary_accuracy: 0.7500
Epoch 14/20
163/163 [==============================] - 32s 191ms/step - loss: 0.1874 - binary_accuracy: 0.9264 - val_loss: 0.4130 - val_binary_accuracy: 0.7500
Epoch 15/20
163/163 [==============================] - 32s 190ms/step - loss: 0.1863 - binary_accuracy: 0.9256 - val_loss: 0.4106 - val_binary_accuracy: 0.7500
Epoch 16/20
163/163 [==============================] - 32s 189ms/step - loss: 0.1848 - binary_accuracy: 0.9264 - val_loss: 0.4047 - val_binary_accuracy: 0.7500
Epoch 17/20
163/163 [==============================] - 32s 189ms/step - loss: 0.1766 - binary_accuracy: 0.9285 - val_loss: 0.4109 - val_binary_accuracy: 0.7500
Epoch 18/20
163/163 [==============================] - 32s 189ms/step - loss: 0.1792 - binary_accuracy: 0.9260 - val_loss: 0.4179 - val_binary_accuracy: 0.7500
Epoch 19/20
163/163 [==============================] - 32s 190ms/step - loss: 0.1735 - binary_accuracy: 0.9289 - val_loss: 0.4242 - val_binary_accuracy: 0.7500
Epoch 20/20
163/163 [==============================] - 32s 190ms/step - loss: 0.1721 - binary_accuracy: 0.9350 - val_loss: 0.4198 - val_binary_accuracy: 0.7500
# unfreeze the top layers of the model
# compile the model again to update the changes
# model is trained at a low learning rate to prevent overfitting
base_model.trainable = True
model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=keras.metrics.BinaryAccuracy())
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/rmsprop.py:130: UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.
super(RMSprop, self).__init__(name, **kwargs)
# callback is set at 5
# Keras will stop training when the model doesn’t improve for five consecutive epochs
cb = tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
hist = model.fit(training_set, epochs=15,validation_data=val_set, callbacks=[cb])
Epoch 1/15
163/163 [==============================] - 123s 667ms/step - loss: 0.1168 - binary_accuracy: 0.9544 - val_loss: 0.2805 - val_binary_accuracy: 0.9375
Epoch 2/15
163/163 [==============================] - 109s 660ms/step - loss: 0.0813 - binary_accuracy: 0.9663 - val_loss: 0.5464 - val_binary_accuracy: 0.8125
Epoch 3/15
163/163 [==============================] - 109s 660ms/step - loss: 0.0698 - binary_accuracy: 0.9730 - val_loss: 0.1582 - val_binary_accuracy: 0.9375
Epoch 4/15
163/163 [==============================] - 109s 661ms/step - loss: 0.0569 - binary_accuracy: 0.9789 - val_loss: 0.3983 - val_binary_accuracy: 0.8125
Epoch 5/15
163/163 [==============================] - 109s 662ms/step - loss: 0.0482 - binary_accuracy: 0.9808 - val_loss: 0.1151 - val_binary_accuracy: 0.9375
Epoch 6/15
163/163 [==============================] - 109s 660ms/step - loss: 0.0481 - binary_accuracy: 0.9816 - val_loss: 0.1905 - val_binary_accuracy: 0.8750
Epoch 7/15
163/163 [==============================] - 109s 659ms/step - loss: 0.0425 - binary_accuracy: 0.9826 - val_loss: 0.2488 - val_binary_accuracy: 0.8750
Epoch 8/15
163/163 [==============================] - 109s 658ms/step - loss: 0.0351 - binary_accuracy: 0.9875 - val_loss: 0.0283 - val_binary_accuracy: 1.0000
Epoch 9/15
163/163 [==============================] - 109s 660ms/step - loss: 0.0324 - binary_accuracy: 0.9875 - val_loss: 0.0229 - val_binary_accuracy: 1.0000
Epoch 10/15
163/163 [==============================] - 109s 659ms/step - loss: 0.0326 - binary_accuracy: 0.9873 - val_loss: 0.0309 - val_binary_accuracy: 1.0000
Epoch 11/15
163/163 [==============================] - 109s 658ms/step - loss: 0.0296 - binary_accuracy: 0.9891 - val_loss: 0.2568 - val_binary_accuracy: 0.8750
Epoch 12/15
163/163 [==============================] - 109s 659ms/step - loss: 0.0249 - binary_accuracy: 0.9902 - val_loss: 0.0208 - val_binary_accuracy: 1.0000
Epoch 13/15
163/163 [==============================] - 108s 655ms/step - loss: 0.0207 - binary_accuracy: 0.9929 - val_loss: 0.0225 - val_binary_accuracy: 1.0000
Epoch 14/15
163/163 [==============================] - 108s 655ms/step - loss: 0.0213 - binary_accuracy: 0.9914 - val_loss: 0.0428 - val_binary_accuracy: 1.0000
Epoch 15/15
163/163 [==============================] - 108s 655ms/step - loss: 0.0211 - binary_accuracy: 0.9916 - val_loss: 0.0320 - val_binary_accuracy: 1.0000
# visualising the fine tuning results
plt.figure(figsize=(8,6))
plt.title('Accuracy scores')
plt.plot(hist.history['binary_accuracy'],'go-')
plt.plot(hist.history['val_binary_accuracy'],'ro-')
plt.legend(['binary_accuracy', 'val_binary_accuracy'])
plt.show()
plt.figure(figsize=(8,6))
plt.title('Loss value')
plt.plot(hist.history['loss'],'go-')
plt.plot(hist.history['val_loss'],'ro-')
plt.legend(['loss', 'val_loss'])
plt.show()
train_accuracy = model.evaluate(training_set)
print('The accuracy on training set :',train_accuracy[1]*100,'%')
163/163 [==============================] - 32s 186ms/step - loss: 0.0218 - binary_accuracy: 0.9904
The accuracy on training set : 99.04140830039978 %
test_accuracy = model.evaluate(testing_set)
print('The accuracy on test set :',test_accuracy[1]*100,'%')
20/20 [==============================] - 5s 168ms/step - loss: 0.8348 - binary_accuracy: 0.8574
The accuracy on test set : 85.73718070983887 %
# Retrieve a batch of images from the test set
image_batch, label_batch = testing_set.as_numpy_iterator().next()
predictions = model.predict_on_batch(image_batch).flatten()
# Apply a sigmoid since the model returns logits
predictions = tf.nn.sigmoid(predictions)
predictions = tf.where(predictions < 0.5, 0, 1)
print('Predictions:\n', predictions.numpy())
print('Labels:\n', label_batch)
plt.figure(figsize=(10, 10))
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(image_batch[i].astype("uint8"))
plt.title(class_names[predictions[i]])
plt.axis("off")
Predictions:
[1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0]
Labels:
[1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 0 0 0 0 1 1 1 0]