CNN

import numpy as np import pandas as pd import random import os from pathlib import Path import tensorflow as tf # visualisation import matplotlib.pyplot as plt from matplotlib.pyplot import rcParams import seaborn as sns %matplotlib inline # keras libraries import tensorflow as tf from tensorflow import keras from keras.models import Sequential from keras.layers import Conv2D from keras.layers import MaxPooling2D from keras.layers import Flatten from keras.layers import Dense from keras.preprocessing.image import ImageDataGenerator, load_img # image processing import cv2 import glob from PIL import Image # surpress warning import warnings warnings.filterwarnings("ignore", category=FutureWarning)

!pip install kaggle

# API token to be uploaded here from google.colab import files files.upload()

!mkdir -p ~/.kaggle !cp kaggle.json ~/.kaggle/ !chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

from zipfile import ZipFile file_name = "chest-xray-pneumonia.zip" with ZipFile (file_name, 'r') as zip: zip.extractall() print('Done')

main_path = Path("/content/chest_xray") # parent directory train_path = main_path / "train" val_path = main_path / "val" test_path = main_path / "test"

train_normal = glob.glob(str(train_path)+"/NORMAL/*.jpeg") train_pneumonia = glob.glob(str(train_path)+"/PNEUMONIA/*.jpeg") test_normal = glob.glob(str(test_path)+"/NORMAL/*.jpeg") test_pneumonia = glob.glob(str(test_path)+"/PNEUMONIA/*.jpeg")

# obtaining file paths of the images in training folder and putting them in a dataframe # categorising the file paths as normal or pneumonia train_list = [x for x in train_normal] train_list.extend([x for x in train_pneumonia]) df_train = pd.DataFrame([x for x in train_list], columns = ['feature']) df_train['label'] = np.concatenate([['Normal']*len(train_normal) , ['Pneumonia']*len(train_pneumonia)])

df_train.head()

# visualising some of the images in the training set fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]}) for i, ax in enumerate(axes.flat): img = cv2.imread(train_pneumonia[i]) img = cv2.resize(img, (220,220)) ax.imshow(img) ax.set_title("Pneumonia") plt.show() fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]}) for i, ax in enumerate(axes.flat): img = cv2.imread(train_normal[i]) img = cv2.resize(img, (220,220)) ax.imshow(img) ax.set_title("Normal") plt.show()

# obtaining file paths of the images in testing folder and putting them in a dataframe # categorising the file paths as normal or pneumonia test_list = [x for x in test_normal] test_list.extend([x for x in test_pneumonia]) df_test = pd.DataFrame([x for x in test_list], columns = ['feature']) df_test['label'] = np.concatenate([['Normal']*len(test_normal) , ['Pneumonia']*len(test_pneumonia)])

df_test.head()

# visualising some of the images in the test set fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]}) for i, ax in enumerate(axes.flat): img = cv2.imread(test_pneumonia[i]) img = cv2.resize(img, (220,220)) ax.imshow(img) ax.set_title("Pneumonia") plt.show() fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(15,10), subplot_kw={'xticks':[], 'yticks':[]}) for i, ax in enumerate(axes.flat): img = cv2.imread(test_normal[i]) img = cv2.resize(img, (220,220)) ax.imshow(img) ax.set_title("Normal") plt.show()

f,ax=plt.subplots(1,2,figsize=(15,8)) df_train['label'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True, colors=sns.color_palette('PuBu')) ax[0].set_title('NORMAL Vs. PNEUMONIA') sns.countplot(x='label', data=df_train, palette="PuBu") ax[1].set_title('NORMAL Vs. PNEUMONIA') for p in ax[1].patches: ax[1].annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+300), fontsize = 13) plt.show()

f,ax=plt.subplots(1,2,figsize=(15,8)) df_test['label'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True, colors=sns.color_palette('Pastel2_r')) ax[0].set_title('NORMAL Vs. PNEUMONIA') sns.countplot(x='label', data=df_test, palette="Pastel2_r") ax[1].set_title('NORMAL Vs. PNEUMONIA') for p in ax[1].patches: ax[1].annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+300), fontsize = 13) plt.show()

# loading the image folders directly from directory # images are shuffled, divided into specified batch size and resized training_set = tf.keras.preprocessing.image_dataset_from_directory(train_path, shuffle=True, batch_size=32, image_size=(150, 150)) val_set = tf.keras.preprocessing.image_dataset_from_directory(val_path, shuffle=True, batch_size=32, image_size=(150, 150)) testing_set = tf.keras.preprocessing.image_dataset_from_directory(test_path, shuffle=True, batch_size=32, image_size=(150, 150))

data_augmentation = keras.Sequential([keras.layers.experimental.preprocessing.RandomFlip("horizontal"), keras.layers.experimental.preprocessing.RandomRotation(0.1),])

# visualising the result of the image augmentation class_names = training_set.class_names plt.figure(figsize=(12, 12)) for images, labels in training_set.take(1): first_image = images[0] for i in range(12): ax = plt.subplot(3, 4, i + 1) augmented_image = data_augmentation( tf.expand_dims(first_image, 0)) plt.imshow(augmented_image[0].numpy().astype("int32")) plt.title(class_names[labels[i]]) plt.axis("off")

# model is loaded with weights trained on ImageNet # image was resized to 150 x 150 so input shape will follow suit base_model = keras.applications.Xception(weights='imagenet', input_shape=(150, 150, 3), include_top=False)

# base model layers frozen so they are not updated during training process base_model.trainable = False

# standardising input size and applying data augmentation inputs = keras.Input(shape=(150, 150, 3)) x = data_augmentation(inputs) x = tf.keras.applications.xception.preprocess_input(x)

x = base_model(x, training=False) # batch normalisation layers are not updated at fine tuning stage x = keras.layers.GlobalAveragePooling2D()(x) x = keras.layers.Dropout(0.2)(x) outputs = keras.layers.Dense(1)(x) #defininf the final output layer model = keras.Model(inputs, outputs)

# model is first compiled with the loss is specified as such as there are 2 classes # then trained for 20 epochs base_learning_rate = 0.0001 model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate), loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),metrics=keras.metrics.BinaryAccuracy()) model.fit(training_set, epochs=20, validation_data=val_set)

# unfreeze the top layers of the model # compile the model again to update the changes # model is trained at a low learning rate to prevent overfitting base_model.trainable = True model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10), loss=keras.losses.BinaryCrossentropy(from_logits=True), metrics=keras.metrics.BinaryAccuracy())

# callback is set at 5 # Keras will stop training when the model doesn’t improve for five consecutive epochs cb = tf.keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True) hist = model.fit(training_set, epochs=15,validation_data=val_set, callbacks=[cb])

# visualising the fine tuning results plt.figure(figsize=(8,6)) plt.title('Accuracy scores') plt.plot(hist.history['binary_accuracy'],'go-') plt.plot(hist.history['val_binary_accuracy'],'ro-') plt.legend(['binary_accuracy', 'val_binary_accuracy']) plt.show() plt.figure(figsize=(8,6)) plt.title('Loss value') plt.plot(hist.history['loss'],'go-') plt.plot(hist.history['val_loss'],'ro-') plt.legend(['loss', 'val_loss']) plt.show()

train_accuracy = model.evaluate(training_set) print('The accuracy on training set :',train_accuracy[1]*100,'%')

test_accuracy = model.evaluate(testing_set) print('The accuracy on test set :',test_accuracy[1]*100,'%')

# Retrieve a batch of images from the test set image_batch, label_batch = testing_set.as_numpy_iterator().next() predictions = model.predict_on_batch(image_batch).flatten() # Apply a sigmoid since the model returns logits predictions = tf.nn.sigmoid(predictions) predictions = tf.where(predictions < 0.5, 0, 1) print('Predictions:\n', predictions.numpy()) print('Labels:\n', label_batch) plt.figure(figsize=(10, 10)) for i in range(9): ax = plt.subplot(3, 3, i + 1) plt.imshow(image_batch[i].astype("uint8")) plt.title(class_names[predictions[i]]) plt.axis("off")