Working on computer vision problems
What is a Convolutional Neural network ?
Training a CNN for Image Classification
Imports
import os, gc, sys
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageFile
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import StratifiedKFold, train_test_split
import torch
import torch.nn as nn
from torchvision import models
import torchvision.transforms as transforms
import warnings
warnings.filterwarnings("ignore")
Utils
class AverageMeter:
    """
    Computes and stores the average and current value
    """
    def __init__(self):
        self.reset()
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf
    def __call__(self, epoch_score, model, model_path):
        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print('EarlyStopping counter: {} out of {}'.format(
                self.counter, self.patience))
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
            self.counter = 0
    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print(
                'Validation score improved ({} --> {}). Saving model!'.format(self.val_score, epoch_score))
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score
def seed_everything(seed):
    """
    Seeds basic parameters for reproductibility of results
    
    Arguments:
        seed {int} -- Number of the seed
    """
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True
seed_everything(95)
Config
config = {
    'PROJECT_PATH': "/work/aerial-cactus-identification/aerial-cactus-identification/",
    'TRAIN_PATH': "/work/aerial-cactus-identification/aerial-cactus-identification/train/",
    'TEST_PATH': "/work/aerial-cactus-identification/aerial-cactus-identification/test/",
    'TRAIN_FILE': "/work/aerial-cactus-identification/aerial-cactus-identification/train.csv",
    'TEST_FILE': "/work/aerial-cactus-identification/aerial-cactus-identification/sample_submission.csv",
    'MODEL_PATH' : "/work/model.bin",
    'FOLD_NUMBER': 2,
    'TARGET_VAR': "has_cactus",
    'IMAGE_ID': "id",
    'IMAGE_EXT': ".jpg",
    'DEVICE': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
    'N_CLASS': 2,
    'TRAIN_BS': 64,
    'VALID_BS': 64,
    'EPOCHS': 1,
    'LR': 1e-4,
    'IMAGE_SIZE': (32, 32),
}
EDA
train = pd.read_csv(config["TRAIN_FILE"])
def get_train_file_path(image_id):
    return "/work/aerial-cactus-identification/aerial-cactus-identification/train/{}".format(image_id)
train['file_path'] = train[config["IMAGE_ID"]].apply(get_train_file_path)
plt.figure(figsize=(20, 20))
row, col = 4, 4
for i in range(row * col):
    plt.subplot(col, row, i+1)
    image = Image.open(train.loc[i, 'file_path'])
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    target = train.loc[i, config["TARGET_VAR"]]
    plt.imshow(image)
    plt.title(f"has_cactus: {target}")
plt.show()
train[config["TARGET_VAR"]].hist()
Dataset
class CACTUS_DATASET:
    '''
    Pytorch class to define an image dataset
    image_path : must be a list of path to individual images like "data/image_001.png"
    resize : if not None, image will be resized to this size, MUST BE A TUPLE
    label : labels for each image of their class
    transforms : if not None, transform will be applied on images
    '''
    def __init__(self, image_path, resize, label=None, transforms=None):
        self.image_path = image_path
        self.resize = resize
        self.label = label
        self.transforms = transforms
    
    def __len__(self):
        return len(self.image_path)
    def __getitem__(self, item):
        # Open image object
        image = Image.open(self.image_path[item])
        # Resize the image to given size
        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]), resample=Image.BILINEAR
            )
        
        # Applying transformations if provided
        if self.transforms is not None:
            image = self.transforms(image)
        # return dictionnary of pairs image, label
        if self.label is not None:
            label = self.label[item]
            return {"images": image, "labels": torch.tensor(label, dtype=torch.long)}
        # At test time we don't have label so we return only images
        else:
            return {"images" : image}
Augmentations
Augmentations = {
    'train':
        transforms.Compose(
            [
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]
        ),
    'valid':
        transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]
        ),
    'test':
        transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]
        )
}
Model
class CACTUS_MODEL(nn.Module):
    def __init__(self, n_class=2, pretrain=True):
        super(CACTUS_MODEL, self).__init__()
    
        self.base_model = models.resnet18(pretrained=pretrain)
        in_features = self.base_model.fc.out_features
        self.l0 = nn.Linear(in_features, n_class)
    
    def forward(self, image):
        x = self.base_model(image)
        out = self.l0(x)
        return out
Trainer
#################
# TRAINER CLASS #
#################
class TRAINER:
    '''
    training_step train the model for one epoch
    eval_step evaluate the current model on validation data and output current loss and other evaluation metric
    '''
    def __init__(self, model, device, optimizer=None, criterion=None, metric=None):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        self.metric = metric
    #################
    # TRAINING STEP #
    #################
    def training_step(self, data_loader):
        # LOSS AVERAGE
        losses = AverageMeter()
        metrics_avg = AverageMeter()
        # MODEL TO TRAIN MODE
        self.model.train()
        # TRAINING LOOP
        tk0 = tqdm(data_loader, total=len(data_loader))
        for _, data in enumerate(tk0):
            # LOADING IMAGES & LABELS
            images = data["images"].to(self.device) # Load images and place them on device
            labels = data["labels"].to(self.device) # Load labels and place them on device
            # RESET GRADIENTS
            self.model.zero_grad() # Reset model gradients
            # CALCULATE LOSS
            output = self.model(images) # Pass images through the model
            loss = self.criterion(output, labels) # Compute the loss
            loss = loss.to(torch.float32)
            # CALCULATE GRADIENTS
            loss.backward() # Perform gradient descent
            self.optimizer.step() # Update optimizer parameters
            # COMPUTE METRICS
            output = output.argmax(axis=1)
            output = output.cpu().detach().numpy()
            labels = labels.cpu().detach().numpy()
            metric_value = self.metric(labels, output)
            losses.update(loss.item(), images.size(0))
            metrics_avg.update(metric_value.item(), images.size(0))
            tk0.set_postfix(loss=losses.avg)
        return loss, metrics_avg.avg
    ###################
    # VALIDATION STEP #
    ###################
    def eval_step(self, data_loader, metric):
        # LOSS & METRIC AVERAGE
        losses = AverageMeter()
        metrics_avg = AverageMeter()
        # MODEL TO EVAL MODE
        self.model.eval()
        # VALIDATION LOOP
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader))
            for _, data in enumerate(tk0):
                # LOADING IMAGES & LABELS
                images = data["images"].to(self.device)
                labels = data["labels"].to(self.device)
                # CALCULATE LOSS & METRICS
                output = self.model(images)
                loss = self.criterion(output, labels)
                
                output = output.argmax(axis=1)
                output = output.cpu().detach().numpy()
                labels = labels.cpu().detach().numpy()
                metric_value = self.metric(labels, output)
                losses.update(loss.item(), images.size(0))
                metrics_avg.update(metric_value.item(), images.size(0))
                tk0.set_postfix(loss=losses.avg)
        print(f"Validation Loss = {losses.avg}")
        return loss, metrics_avg.avg
Splitting data
df = pd.read_csv(config["TRAIN_FILE"])
df.head()
df = df.sample(frac=1).reset_index(drop=True) #We resample and randomly shuffle our dataset
df["split"] = 0
target = df[config["TARGET_VAR"]]
# We use the stratify parameter to be sure that our train and validation set have the same distribution in the target variable
train, valid = train_test_split(df, test_size=0.2, stratify=target)
for i in valid.index:
    df.loc[i, "split"] = 1
Training & Validation
def run():
    train_losses = []
    train_scores = []
    val_losses = []
    val_scores = []
    # LOADING DATA FILE
    # LOADING MODEL ON DEVICE
    model = CACTUS_MODEL()
    model.to(config["DEVICE"])
    # CREATING TRAINING AND VALIDATION SETS
    df_train = df[df.split != 1].reset_index(drop=True)
    df_valid = df[df.split == 1].reset_index(drop=True)
    # LOADING MODEL PARAMETERS
    model.to(config["DEVICE"])
    ########################
    # CREATING DATALOADERS #
    ########################
    # TRAINING IDs & LABELS
    train_img = df_train[config["IMAGE_ID"]].values.tolist()
    train_img = [os.path.join(config["TRAIN_PATH"], os.path.splitext(i)[0] + config["IMAGE_EXT"]) for i in train_img]
    train_labels = df_train[config["TARGET_VAR"]].values
    
    # VALIDATION IDs & LABELS
    valid_img = df_valid[config["IMAGE_ID"]].values.tolist()
    valid_img = [os.path.join(config["TRAIN_PATH"], os.path.splitext(i)[0] + config["IMAGE_EXT"]) for i in valid_img]
    valid_labels = df_valid[config["TARGET_VAR"]].values
    
    # TRAINING DATASET
    train_dataset = CACTUS_DATASET(
        image_path=train_img,
        resize=config["IMAGE_SIZE"],
        label=train_labels,
        transforms=Augmentations["train"]
    )
    # TRAINING DATALOADER
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=config["TRAIN_BS"], shuffle=True, num_workers=0
    )
    # VALIDATION DATASET
    valid_dataset = CACTUS_DATASET(
        image_path=valid_img,
        resize=config["IMAGE_SIZE"],
        label=valid_labels,
        transforms=Augmentations["valid"]
    )
    # VALIDATION DATALOADER
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=config["VALID_BS"], shuffle=True, num_workers=0
    )
    
    # METRIC 
    metric = roc_auc_score
    # LOSS FUNCTION
    criterion = nn.CrossEntropyLoss()
    # SET OPTIMIZER, SCHEDULER
    optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20, eta_min=1e-6)
    # SET EARLY STOPPING FUNCTION
    es = EarlyStopping(patience=2, mode="max")
    # CREATE TRAINER
    trainer = TRAINER(model = model, 
                    device = config["DEVICE"],
                    optimizer = optimizer, 
                    criterion = criterion,
                    metric=metric)
    
    # START TRAINING FOR N EPOCHS
    for epoch in range(config["EPOCHS"]):
        print(f"Starting epoch number : {epoch}")
        # TRAINING PHASE
        print("Training the model...")
        train_loss, train_score = trainer.training_step(train_loader)
        print(f"Training score = {train_score}")
        # VALIDATION PHASE
        print("Evaluating the model...")
        val_loss, val_score = trainer.eval_step(valid_loader, metric)
        if scheduler is not None:
            scheduler.step(val_loss)
        # METRICS
        print(f"Validation score = {val_score}")
        train_losses.append(train_loss)
        train_scores.append(train_score)
        val_losses.append(val_loss)
        val_scores.append(val_score)
        #SAVING CHECKPOINTS
        es(val_score, model, model_path=f"model.bin")
        if es.early_stop:
            print("Early Stopping")
            break
        gc.collect()
    return train_losses, train_scores, val_losses, val_scores
train_losses, train_scores, val_losses, val_scores = run()
Inference
def predict(model, data_loader):
    preds = []
    with torch.no_grad():
        tk0 = tqdm(data_loader, total=len(data_loader))
        for _, data in enumerate(tk0):
            # LOADING IMAGES & LABELS
            images = data["images"].to(config["DEVICE"])
            
            # PREDICTIONS USING THE MODEL
            output = model(images)
            output = output.detach().cpu().numpy()
            preds.extend(output)
    tk0.set_postfix(stage="test")
    return preds
def inference():
    test_df = pd.read_csv(config["TEST_FILE"])
    test_img = test_df[config["IMAGE_ID"]].values.tolist()
    test_img = [os.path.join(config["TEST_PATH"], os.path.splitext(i)[0] + config["IMAGE_EXT"]) for i in test_img]
    
    # TEST DATASET
    test_ds = CACTUS_DATASET(
        image_path=test_img,
        resize=config["IMAGE_SIZE"],
        label=None,
        transforms=Augmentations["test"]
    )
    # TEST DATALOADER
    test_loader = torch.utils.data.DataLoader(test_ds, batch_size=config["VALID_BS"], shuffle=False, num_workers=0)
    
    # LOADING MODEL
    MODEL = CACTUS_MODEL(pretrain = False)
    MODEL.to(config["DEVICE"])
    MODEL.load_state_dict(torch.load(config["MODEL_PATH"]))
    MODEL.eval()
    
    predictions = predict(MODEL, test_loader)
    predictions = np.vstack(predictions)
    predictions = predictions.argmax(axis=1)
    return predictions
preds = inference()
preds
test_df = pd.read_csv(config["TEST_FILE"])
test_df[config["TARGET_VAR"]] = preds
test_df.to_csv('submission.csv', index=False)
test_df.head()