pip install split-folders
pip install scikit-image
pip install torchmetrics
import splitfolders
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
from pathlib import Path
from skimage import io
from IPython.display import display
import torchvision.models as models
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data as data
import torchvision
from torchvision import transforms
from torchmetrics import MetricCollection, Accuracy, Precision, Recall
mv kaggle.json /root/.kaggle/
%%bash
kaggle datasets download -d andrewmvd/medical-mnist
unzip -q medical-mnist.zip -d data/
# Train test split. Do not override this cell!
NEW_DATA_DIR = 'splitted_data/'
splitfolders.ratio("data", output=NEW_DATA_DIR, seed=42, ratio=(.05, .95) , group_prefix=None)
!mv splitted_data/val splitted_data/test
data_path = Path('data/')
labels_path = [label_folder for label_folder in data_path.iterdir()]
fig = make_subplots(
rows=2, cols=3,
subplot_titles=[str(lab_path).split('/')[1] for lab_path in labels_path]
)
tmp = []
for i, label_path in enumerate(labels_path):
label_images_path = list(label_path.iterdir())
random_image_path = np.random.choice(label_images_path)
img = io.imread(random_image_path)
tmp.append(img)
fig.add_trace(
go.Heatmap(z=img[::-1], coloraxis="coloraxis"), # for black and white images
row = (i // 3) + 1, col = (i % 3) + 1
)
fig.update_layout(
height=600,
width=800,
title_text="Examples of images",
coloraxis={"colorscale": "greys"},
)
fig.show()
skinchanger = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.ToTensor()
])
train_data = torchvision.datasets.ImageFolder(root="./splitted_data/train/", transform=skinchanger)
train_data_loader = data.DataLoader(train_data, batch_size=10, shuffle=True, num_workers=4)
test_data = torchvision.datasets.ImageFolder(root="./splitted_data/test/", transform=skinchanger)
test_data_loader = data.DataLoader(test_data, batch_size=10, shuffle=True, num_workers=4)
print("Number of train samples: ", len(train_data))
print("Number of test samples: ", len(test_data))
class nono(nn.Module):
def __init__(self):
super(nono, self).__init__()
self.cnn_layers = nn.Sequential(
nn.Conv2d(1, 4, kernel_size=5, stride=1, padding=1),
nn.Relu(inplace=True),
nn.Conv2d(4, 4, kernel_size=5, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.BatchNorm2d(4),
nn.Conv2d(4, 16, kernel_size=5, stride=1, padding=1),
nn.Relu(inplace=True),
nn.Conv2d(16, 16, kernel_size=5, stride=1, padding=1),
nn.Relu(inplace=True),
nn.Dropout(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.BatchNorm2d(16)
)
self.linear_layers = nn.Sequential(nn.Linear(784, len(labels_path)))
# Defining the forward pass
def forward(self, x):
x = self.cnn_layers(x)
x = x.view(x.size(0), -1)
x = self.linear_layers(x)
return x
model = nono()
opti = torch.optim.Adam(model.parameters(), lr=0.003)
entropy = nn.CrossEntropyLoss()
for epoch in range(100):
losses = []
model.train()
for i, (x, y) in enumerate(train_data_loader):
opti.zero_grad()
outputs = model(x)
loss = entropy(outputs, y)
loss.backward()
optimizer.step()
losses.append(loss.item())
if i % 100 == 0:
print("Epoch: %d, i: %4d, loss=%.3f" % (epoch + 1, i + 1, np.average(losses)))
train_loss = np.average(losses)
metrics = MetricCollection([
Recall(num_classes=len(labels_path), average='macro'),
Precision(num_classes=len(labels_path), average='macro')
])
with torch.no_grad():
for step, (x, y) in enumerate(test_data_loader):
outputs = model(x)
_, predicted = torch.max(outputs.data, 1)
metrics(predicted, y)
results = metrics.compute()
print("Recall: {}".format(results["Recall"]))
print("Precision: {}".format(results["Precision"]))
print(results)
augmentation = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.RandomHorizontalFlip(p=1),
transforms.ToTensor()
])
aug = torchvision.datasets.ImageFolder(root="./splitted_data/train/", transform=augmentation)
image_dataset = torch.utils.data.ConcatDataset([
train_data,
aug
])
train_data_loader = data.DataLoader(image_dataset, batch_size=10, shuffle=True, num_workers=4)
print("Number of train samples: ", len(image_dataset))
model = nono()
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
entropy = nn.CrossEntropyLoss()
for epoch in range(100):
losses = []
model.train()
for i, (x, y) in enumerate(train_data_loader):
optimizer.zero_grad()
outputs = model(x)
loss = entropy(outputs, y)
loss.backward()
optimizer.step()
losses.append(loss.item())
if i % 100 == 0:
print("Epoch: %d, i: %4d, loss=%.3f" % (epoch + 1, i + 1, np.average(losses)))
train_loss = np.average(losses)
metrics = MetricCollection([
Recall(num_classes=len(labels_path), average='macro'),
Precision(num_classes=len(labels_path), average='macro')
])
with torch.no_grad():
for step, (x, y) in enumerate(test_data_loader):
outputs = model(x)
_, predicted = torch.max(outputs.data, 1)
metrics(predicted, y)
results = metrics.compute()
print("Recall: {}".format(results["Recall"]))
print("Precision: {}".format(results["Precision"]))
print(results)
class shefine(nn.Module):
def __init__(self, original_model):
super(shefine, self).__init__()
self.conv1 = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=1, bias=False)
self.features = nn.Sequential(*list(original_model.children())[:-1])
self.classifier = nn.Sequential(nn.Linear(2048, len(labels_path)))
for p in self.features.parameters():
p.requires_grad = False
def forward(self, x):
f = self.conv1(x)
f = self.features(f)
f = f.view(f.size(0), -1)
y = self.classifier(f)
return y
pip install ipywidgets
original = torchvision.models.resnet50(pretrained=True)
model = shefine(original)
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
entropy = nn.CrossEntropyLoss()
for epoch in range(100):
losses = []
model.train()
for i, (x, y) in enumerate(train_data_loader):
optimizer.zero_grad()
outputs = model(x)
loss = entropy(outputs, y)
loss.backward()
optimizer.step()
losses.append(loss.item())
if i % 100 == 0:
print("Epoch: %d, i: %4d, loss=%.3f" % (epoch + 1, i + 1, np.average(losses)))
train_loss = np.average(losses)
metrics = MetricCollection([
Recall(num_classes=CLASSES, average='macro'),
Precision(num_classes=len(labels_path), average='macro')
])
with torch.no_grad():
for step, (x, y) in enumerate(test_data_loader):
outputs = model(x)
_, predicted = torch.max(outputs.data, 1)
metrics(predicted, y)
results = metrics.compute()
print("Recall: {}".format(results["Recall"]))
print("Precision: {}".format(results["Precision"]))
print(results)