## Import libraries

In [1]:
import os
import pandas as pd
import numpy as np
import copy
import torch
import loss
from torch import optim
from metrics import eval_metrics, get_epoch_acc
from dataloader import DataLoader
from cross_val import CrossVal
from torchvision import transforms
from eval import eval
from config import ModelParameters

# Import available models, you can also explore other PyTorch models
from cracknet import cracknet
from unet import UNet, UNetResnet
from segnet import SegNet, SegResNet

In [2]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# os.environ["TORCH_USE_CUDA_DSA"] = "1"

## Training functions

In [3]:
def train_oneepoch(model, class_count, criterion, eval_metric, device, my_optimizer, my_lr_scheduler, dataloader):
    model.train()
    batch_loss = 0
    batch_acc_numerator = 0
    batch_acc_denominator = 0
    epoch_lr = my_lr_scheduler.get_last_lr()[0]
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        my_optimizer.zero_grad()
        mask_pred = model(inputs)
        loss = criterion(mask_pred, labels)
        loss.backward()
        my_optimizer.step()
        
        # batch_loss += loss
        batch_loss += loss.item()
        
        batch_acc_numerator_tmp, batch_acc_denominator_tmp = eval_metrics(mask_pred, labels, class_count, eval_metric)
        batch_acc_numerator += batch_acc_numerator_tmp
        batch_acc_denominator += batch_acc_denominator_tmp
    my_lr_scheduler.step()
    epoch_loss = batch_loss / len(dataloader)
    epoch_acc = get_epoch_acc(batch_acc_numerator, batch_acc_denominator, eval_metric)

    return epoch_loss, epoch_acc, epoch_lr

In [4]:
def train_main(model, class_count, criterion, eval_metric, EPOCHS, DEVICE, my_optimizer, my_lr_scheduler=None, dataloaders=None, logging=False, model_name='model.pt'):
    model.to(DEVICE)
    train_loss = []
    train_acc = []
    val_loss = []
    val_acc = []
    lr = []
    best_val_acc = 0

    for epoch in range(EPOCHS):
        epoch_train_loss, epoch_train_acc, epoch_lr = train_oneepoch(model, class_count, criterion, eval_metric, DEVICE, my_optimizer, my_lr_scheduler, dataloaders['train'])
        epoch_val_loss, epoch_val_acc = eval(model, class_count, criterion, eval_metric, DEVICE, dataloaders['val'])

        if epoch_val_acc > best_val_acc:
            best_val_acc = epoch_val_acc
        best_state_dict = copy.deepcopy(model.state_dict())

        if logging:
            # train_loss.append(epoch_train_loss.detach().cpu().numpy().tolist())
            train_loss.append(epoch_train_loss)
            train_acc.append(epoch_train_acc)
            # val_loss.append(epoch_val_loss.detach().cpu().numpy().tolist())
            val_loss.append(epoch_val_loss)
            val_acc.append(epoch_val_acc)
            lr.append(epoch_lr)
        torch.cuda.empty_cache() 
        
        print(f'Epoch {epoch}/{EPOCHS - 1}: TrainLoss: {epoch_train_loss:.4f}, TrainAcc: {epoch_train_acc:.4f}, ValLoss: {epoch_val_loss:.4f}, ValAcc: {epoch_val_acc:.4f}')

    print('Best Acc: {:4f}'.format(best_val_acc))

    # load best model weights
    model.load_state_dict(best_state_dict)
    torch.save(model, model_name + '.pt')
    
    # save training details
    pd.DataFrame({'Epochs':range(EPOCHS), 'Learning Rate': lr, 'Training Loss': train_loss, 
                    'Training Acc': train_acc, 'Validation Loss': val_loss, 
                    'Validation Acc': val_acc}).to_csv(model_name + '.csv', index = False)

    return model

## Training parameters

Inside the data directory, the structure should be following:
- train
    - images
        - IL 991.png
        - IL 992.png
    - labels
        - IL 991.npy
        - IL 992.npy
    - class_names.txt
- val
    - images
        - IL 993.png
    - labels
        - IL 993.npy
    - class_names.txt

class_names.txt is to specify the label class name for the training

Example content of class_names.txt:

\_background_ <br>
fault

In [5]:
# Name the data directory and model filename
DIR = 'data/' # Data directory
MODEL_FILENAME = 'cracknet.pt' # Model filename



In [6]:
num_fault = []
num_horizon = []
num_seismic = []
indeces = [v for v in range(1000, 1441)]
for name in os.listdir('../data/raw_fault'):
    if name == '.ipynb_checkpoints':
        continue

    num_fault.append(int(name.split('-')[1].split('.')[0]))

for name in os.listdir('../data/raw_horizon'):
    if name == '.ipynb_checkpoints':
        continue
        
    num_horizon.append(int(name.split('-')[1].split('.')[0]))

for name in os.listdir('../data/raw_seismic'):
    if name == '.ipynb_checkpoints':
        continue
        
    num_seismic.append(int(name.split('-')[1].split('.')[0]))

df = pd.DataFrame({
    'RAW_SEISMIC': [f"seismic-{x}.png" if x in num_seismic else None for x in indeces],
    'RAW_FAULT': [f"fault-{x}.npy" if x in num_fault else None for x in indeces],
    'RAW_HORIZON': [f"horizon-{x}.npy" if x in num_horizon else None for x in indeces]
})

In [7]:
cv = CrossVal(df, 3)
dataloaders = cv[0]
# dataset = {}
# dataset['train'] = LabelMe(data_folder=os.path.join(DIR,'train'), transform=data_transforms['train'],
#                                 img_size=(1024, 1024))
# dataset['val'] = LabelMe(data_folder=os.path.join(DIR,'val'), transform=data_transforms['val'],
#                                 img_size=(1024, 1024))
# dataloaders = {x: torch.utils.data.DataLoader(dataset[x], batch_size = BATCH_SIZE,
#                                             shuffle = True, num_workers = 8, 
#                                             drop_last = False)
#                                             for x in ['train', 'val']}
class_count = len(dataloaders['train'].dataset.label)

In [8]:
# Choose a model for training, you can refer to the models that have been imported above
model = cracknet(pretrained = ModelParameters.PRETRAINED, num_classes = class_count)

my_optimizer = optim.Adam(model.parameters(), lr = ModelParameters.LEARNING_RATE) # Check https://pytorch.org/docs/stable/optim.html for other optimizers
my_lr_scheduler = optim.lr_scheduler.StepLR(my_optimizer, step_size=25, gamma=0.1) # Check https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate for other schedulers
# my_lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(my_optimizer,
#                                                       factor=0.1, 
#                                                       patience=10,
#                                                       threshold=0.000001
#                                                       ) # Check https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate for other schedulers

## Start model training

In [None]:
train_main(model, class_count, ModelParameters.CRITERION, ModelParameters.EVAL_METRIC,
    
           # 1,                  # Epochs
           ModelParameters.EPOCHS,
           DEVICE, my_optimizer, 
           my_lr_scheduler, dataloaders, 
           logging = ModelParameters.LOGGING, 
           model_name = MODEL_FILENAME
          )

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([4, 3, 512, 512])
########
Epoch 0/4: TrainLoss: 0.4891, TrainAcc: 0.6659, ValLoss: 0.4983, ValAcc: 0.6948
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 512, 512])
########
########
torch.Size([9, 3, 

In [None]:
cracknet_model = cracknet(pretrained = ModelParameters.PRETRAINED, num_classes = class_count)
cracknet_model_total_params = sum(p.numel() for p in cracknet_model.parameters())

unet_model = UNet(num_classes = class_count)
unet_model_total_params = sum(p.numel() for p in unet_model.parameters())

unet_resnet_model = UNetResnet(num_classes = class_count)
unet_resnet_model_total_params = sum(p.numel() for p in unet_resnet_model.parameters())

segnet_model = SegNet(num_classes = class_count)
segnet_model_total_params  = sum(p.numel() for p in segnet_model.parameters())

segresnet_model = SegResNet(num_classes = class_count)
segresnet_model_total_params  = sum(p.numel() for p in segresnet_model.parameters())

In [None]:
cracknet_model_total_params

In [None]:
unet_model_total_params

In [None]:
unet_resnet_model_total_params

In [None]:
segnet_model_total_params

In [None]:
segresnet_model_total_params