Chest X-Ray Test
This notebook is about exploring the opportunity for performing ML-techniques on a dataset provided by Kaggle.
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import itertools
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline
!ls /datasets/chestxray
dir_path ='/datasets/chestxray/input/chest-xray-pneumonia/chest_xray/train'
test_path = '/datasets/chestxray/input/chest-xray-pneumonia/chest_xray/test'
valid_path = '/datasets/chestxray/input/chest-xray-pneumonia/chest_xray/val'
IMAGE_SIZE= (224, 224)
BATCH_SIZE = 32
train_dir = tf.keras.preprocessing.image_dataset_from_directory(dir_path,
image_size=IMAGE_SIZE,
batch_size=BATCH_SIZE,
label_mode='binary')
val_dir = tf.keras.preprocessing.image_dataset_from_directory(valid_path,
image_size=IMAGE_SIZE,
batch_size=BATCH_SIZE,
label_mode='binary')
test_dir = tf.keras.preprocessing.image_dataset_from_directory(test_path,
image_size=IMAGE_SIZE,
batch_size=BATCH_SIZE,
label_mode='binary')
class_names = train_dir.class_names
for images, labels in train_dir.take(1):
images = images[0]/255.
train_ds = train_dir.prefetch(tf.data.AUTOTUNE)
val_ds = val_dir.prefetch(tf.data.AUTOTUNE)
test_ds = test_dir.prefetch(tf.data.AUTOTUNE)
train_ds
# Early Stopping Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3)
# Baseline model
model_0 = Sequential([
Conv2D(16, 3, activation='relu', input_shape=(224,224,3)),
MaxPool2D(),
Dropout(0.5),
Conv2D(16, 3, activation='relu'),
MaxPool2D(),
Dropout(0.5),
Flatten(),
Dense(1, activation='sigmoid')
])
# Compile the model
model_0.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
# Get the summary
model_0.summary()
# fit the model
history_model_0 = model_0.fit(train_ds,
epochs=5,
validation_data=val_ds,
callbacks=[early_stopping])
# Plot the loss curves
def plot_loss_curves(history):
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, label='Training acc')
plt.plot(epochs, val_acc, label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, label='Training loss')
plt.plot(epochs, val_loss, label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
plot_loss_curves(history_model_0)