import numpy as np
import pandas as pd
import random
import os
from pathlib import Path
import tensorflow as tf
# visualisation
import matplotlib.pyplot as plt
from matplotlib.pyplot import rcParams
import seaborn as sns
%matplotlib inline
# keras libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.preprocessing.image import ImageDataGenerator, load_img
# image processing
import cv2
import glob
from PIL import Image
# surpress warning
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
!pip install kaggle
# API token to be uploaded here
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
from zipfile import ZipFile
file_name = "chest-xray-pneumonia.zip"
with ZipFile (file_name, 'r') as zip:
zip.extractall()
print('Done')
main_path = Path("/content/chest_xray")
# parent directory
train_path = main_path / "train"
val_path = main_path / "val"
test_path = main_path / "test"
train_normal = glob.glob(str(train_path)+"/NORMAL/*.jpeg")
train_pneumonia = glob.glob(str(train_path)+"/PNEUMONIA/*.jpeg")
test_normal = glob.glob(str(test_path)+"/NORMAL/*.jpeg")
test_pneumonia = glob.glob(str(test_path)+"/PNEUMONIA/*.jpeg")
# obtaining file paths of the images in training folder and putting them in a dataframe
# categorising the file paths as normal or pneumonia
train_list = [x for x in train_normal]
train_list.extend([x for x in train_pneumonia])
df_train = pd.DataFrame([x for x in train_list], columns = ['feature'])
df_train['label'] = np.concatenate([['Normal']*len(train_normal) , ['Pneumonia']*len(train_pneumonia)])
df_train.head()
# visualising some of the images in the training set
fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
img = cv2.imread(train_pneumonia[i])
img = cv2.resize(img, (220,220))
ax.imshow(img)
ax.set_title("Pneumonia")
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
img = cv2.imread(train_normal[i])
img = cv2.resize(img, (220,220))
ax.imshow(img)
ax.set_title("Normal")
plt.show()
# obtaining file paths of the images in testing folder and putting them in a dataframe
# categorising the file paths as normal or pneumonia
test_list = [x for x in test_normal]
test_list.extend([x for x in test_pneumonia])
df_test = pd.DataFrame([x for x in test_list], columns = ['feature'])
df_test['label'] = np.concatenate([['Normal']*len(test_normal) , ['Pneumonia']*len(test_pneumonia)])
df_test.head()
# visualising some of the images in the test set
fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
img = cv2.imread(test_pneumonia[i])
img = cv2.resize(img, (220,220))
ax.imshow(img)
ax.set_title("Pneumonia")
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
img = cv2.imread(test_normal[i])
img = cv2.resize(img, (220,220))
ax.imshow(img)
ax.set_title("Normal")
plt.show()
f,ax=plt.subplots(1,2,figsize=(15,8))
df_train['label'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True, colors=sns.color_palette('PuBu'))
ax[0].set_title('NORMAL Vs. PNEUMONIA')
sns.countplot(x='label', data=df_train, palette="PuBu")
ax[1].set_title('NORMAL Vs. PNEUMONIA')
for p in ax[1].patches:
ax[1].annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+300), fontsize = 13)
plt.show()
f,ax=plt.subplots(1,2,figsize=(15,8))
df_test['label'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True, colors=sns.color_palette('Pastel2_r'))
ax[0].set_title('NORMAL Vs. PNEUMONIA')
sns.countplot(x='label', data=df_test, palette="Pastel2_r")
ax[1].set_title('NORMAL Vs. PNEUMONIA')
for p in ax[1].patches:
ax[1].annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+300), fontsize = 13)
plt.show()
# loading the image folders directly from directory
# images are shuffled, divided into specified batch size and resized
training_set = tf.keras.preprocessing.image_dataset_from_directory(train_path, shuffle=True, batch_size=32, image_size=(150, 150))
val_set = tf.keras.preprocessing.image_dataset_from_directory(val_path, shuffle=True, batch_size=32, image_size=(150, 150))
testing_set = tf.keras.preprocessing.image_dataset_from_directory(test_path, shuffle=True, batch_size=32, image_size=(150, 150))
data_augmentation = keras.Sequential([keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
keras.layers.experimental.preprocessing.RandomRotation(0.1),])
# visualising the result of the image augmentation
class_names = training_set.class_names
plt.figure(figsize=(12, 12))
for images, labels in training_set.take(1):
first_image = images[0]
for i in range(12):
ax = plt.subplot(3, 4, i + 1)
augmented_image = data_augmentation(
tf.expand_dims(first_image, 0))
plt.imshow(augmented_image[0].numpy().astype("int32"))
plt.title(class_names[labels[i]])
plt.axis("off")
# model is loaded with weights trained on ImageNet
# image was resized to 150 x 150 so input shape will follow suit
base_model = keras.applications.Xception(weights='imagenet', input_shape=(150, 150, 3), include_top=False)
# base model layers frozen so they are not updated during training process
base_model.trainable = False
# standardising input size and applying data augmentation
inputs = keras.Input(shape=(150, 150, 3))
x = data_augmentation(inputs)
x = tf.keras.applications.xception.preprocess_input(x)
x = base_model(x, training=False) # batch normalisation layers are not updated at fine tuning stage
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x)
outputs = keras.layers.Dense(1)(x) #defininf the final output layer
model = keras.Model(inputs, outputs)
# model is first compiled with the loss is specified as such as there are 2 classes
# then trained for 20 epochs
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate), loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),metrics=keras.metrics.BinaryAccuracy())
model.fit(training_set, epochs=20, validation_data=val_set)
# unfreeze the top layers of the model
# compile the model again to update the changes
# model is trained at a low learning rate to prevent overfitting
base_model.trainable = True
model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=keras.metrics.BinaryAccuracy())
# callback is set at 5
# Keras will stop training when the model doesn’t improve for five consecutive epochs
cb = tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
hist = model.fit(training_set, epochs=15,validation_data=val_set, callbacks=[cb])
# visualising the fine tuning results
plt.figure(figsize=(8,6))
plt.title('Accuracy scores')
plt.plot(hist.history['binary_accuracy'],'go-')
plt.plot(hist.history['val_binary_accuracy'],'ro-')
plt.legend(['binary_accuracy', 'val_binary_accuracy'])
plt.show()
plt.figure(figsize=(8,6))
plt.title('Loss value')
plt.plot(hist.history['loss'],'go-')
plt.plot(hist.history['val_loss'],'ro-')
plt.legend(['loss', 'val_loss'])
plt.show()
train_accuracy = model.evaluate(training_set)
print('The accuracy on training set :',train_accuracy[1]*100,'%')
test_accuracy = model.evaluate(testing_set)
print('The accuracy on test set :',test_accuracy[1]*100,'%')
# Retrieve a batch of images from the test set
image_batch, label_batch = testing_set.as_numpy_iterator().next()
predictions = model.predict_on_batch(image_batch).flatten()
# Apply a sigmoid since the model returns logits
predictions = tf.nn.sigmoid(predictions)
predictions = tf.where(predictions < 0.5, 0, 1)
print('Predictions:\n', predictions.numpy())
print('Labels:\n', label_batch)
plt.figure(figsize=(10, 10))
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(image_batch[i].astype("uint8"))
plt.title(class_names[predictions[i]])
plt.axis("off")