## Import libraries

In [1]:
import os
import pandas as pd
import numpy as np
import copy
import torch
import loss
from torch import optim
from metrics import eval_metrics, get_epoch_acc
from dataloader import DataLoader
from cross_val import CrossVal
from torchvision import transforms
from eval import eval
from config import ModelParameters, DatasetConfig
from PIL import Image
import cv2

# Import available models, you can also explore other PyTorch models
from cracknet import cracknet
from unet import UNet, UNetResnet
# from segnet import SegNet, SegResNet

2024-11-17 10:10:23.052515: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731838223.073084  149998 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731838223.079448  149998 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-17 10:10:23.100394: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# os.environ["TORCH_USE_CUDA_DSA"] = "1"

## Training functions

In [3]:
def train_oneepoch(model, class_count, criterion, eval_metric, device, my_optimizer, my_lr_scheduler, dataloader):
    model.train()
    batch_loss = 0
    batch_acc_numerator = 0
    batch_acc_denominator = 0
    epoch_lr = my_lr_scheduler.get_last_lr()[0]
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        my_optimizer.zero_grad()
        #print(inputs.size())
        mask_pred = model(inputs)
        loss = criterion(mask_pred, labels)
        loss.backward()
        my_optimizer.step()
        
        # batch_loss += loss
        batch_loss += loss.item()
        
        batch_acc_numerator_tmp, batch_acc_denominator_tmp = eval_metrics(mask_pred, labels, class_count, eval_metric)
        batch_acc_numerator += batch_acc_numerator_tmp
        batch_acc_denominator += batch_acc_denominator_tmp
    my_lr_scheduler.step()
    epoch_loss = batch_loss / len(dataloader)
    epoch_acc = get_epoch_acc(batch_acc_numerator, batch_acc_denominator, eval_metric)

    return epoch_loss, epoch_acc, epoch_lr

In [4]:
def train_main(model, class_count, criterion, eval_metric, EPOCHS, DEVICE, my_optimizer, my_lr_scheduler=None, dataloaders=None, logging=False, model_name='model.pt'):
    model.to(DEVICE)
    train_loss = []
    train_acc = []
    val_loss = []
    val_acc = []
    lr = []
    best_val_loss = 0
    best_val_acc = 0 

    for epoch in range(EPOCHS):
        epoch_train_loss, epoch_train_acc, epoch_lr = train_oneepoch(model, class_count, criterion, eval_metric, DEVICE, my_optimizer, my_lr_scheduler, dataloaders[epoch%3]['train'])
        epoch_val_loss, epoch_val_acc = eval(model, class_count, criterion, eval_metric, DEVICE, dataloaders[epoch%3]['val'])

        # if epoch_val_acc > best_val_acc:
        #     best_val_acc = epoch_val_acc
        # best_state_dict = copy.deepcopy(model.state_dict())

        if epoch_val_loss > best_val_loss:
            best_val_loss = epoch_val_acc
            best_state_dict = copy.deepcopy(model.state_dict())
        
        if logging:
            # train_loss.append(epoch_train_loss.detach().cpu().numpy().tolist())
            train_loss.append(epoch_train_loss)
            train_acc.append(epoch_train_acc)
            # val_loss.append(epoch_val_loss.detach().cpu().numpy().tolist())
            val_loss.append(epoch_val_loss)
            val_acc.append(epoch_val_acc)
            lr.append(epoch_lr)
        torch.cuda.empty_cache() 
        
        print(f'Epoch {epoch}/{EPOCHS - 1}: TrainLoss: {epoch_train_loss:.4f}, TrainAcc: {epoch_train_acc:.4f}, ValLoss: {epoch_val_loss:.4f}, ValAcc: {epoch_val_acc:.4f}')

    print('Best Acc: {:4f}'.format(best_val_acc))

    # load best model weights
    model.load_state_dict(best_state_dict)
    torch.save(model, model_name + '.pt')
    
    # save training details
    pd.DataFrame({'Epochs':range(EPOCHS), 'Learning Rate': lr, 'Training Loss': train_loss, 
                    'Training Acc': train_acc, 'Validation Loss': val_loss, 
                    'Validation Acc': val_acc}).to_csv(model_name + '.csv', index = False)

    return model

## Training parameters

Inside the data directory, the structure should be following:
- train
    - images
        - IL 991.png
        - IL 992.png
    - labels
        - IL 991.npy
        - IL 992.npy
    - class_names.txt
- val
    - images
        - IL 993.png
    - labels
        - IL 993.npy
    - class_names.txt

class_names.txt is to specify the label class name for the training

Example content of class_names.txt:

\_background_ <br>
fault

In [5]:
# Name the data directory and model filename
DIR = 'data/' # Data directory
MODEL_FILENAME = 'cracknet.pt' # Model filename



In [6]:
num_fault = []
num_horizon = []
num_seismic = []



# for name in os.listdir('../data/aug_fault_mask_filter_hasfault'):
for name in os.listdir(DatasetConfig.FAULT_MASK_FOLDER):
    if name == '.ipynb_checkpoints':
        continue
    code = name.replace("fault","").replace(".npy","")
    # if os.path.isfile('../data/aug_horizon_mask_hasfault/horizon{}.npy'.format(code)) and os.path.isfile('../data/aug_raw_seismic_hasfault/seismic{}.png'.format(code)):
    if os.path.isfile(f'{DatasetConfig.RAW_HORIZON_FOLDER}/horizon{code}.npy') or os.path.isfile(f'{DatasetConfig.RAW_SEISMIC_FOLDER}/seismic{code}.png'):
        num_fault.append(name)
        num_horizon.append('horizon{}.npy'.format(code))
        num_seismic.append('seismic{}.png'.format(code))
    

df = pd.DataFrame({
    'RAW_SEISMIC': [f"{x}" for x in num_seismic],
    'RAW_FAULT': [f"{x}" for x in num_fault],
    'RAW_HORIZON': [f"{x}" for x in num_horizon]
})

In [7]:
df

Unnamed: 0,RAW_SEISMIC,RAW_FAULT,RAW_HORIZON
0,seismic-1392_3066_582_0.png,fault-1392_3066_582_0.npy,horizon-1392_3066_582_0.npy
1,seismic-1312_1045_814_0.png,fault-1312_1045_814_0.npy,horizon-1312_1045_814_0.npy
2,seismic-1024_319_614_0.png,fault-1024_319_614_0.npy,horizon-1024_319_614_0.npy
3,seismic-1264_298_434_1.png,fault-1264_298_434_1.npy,horizon-1264_298_434_1.npy
4,seismic-1288_1075_380_1.png,fault-1288_1075_380_1.npy,horizon-1288_1075_380_1.npy
...,...,...,...
765,seismic-1344_1253_75_1.png,fault-1344_1253_75_1.npy,horizon-1344_1253_75_1.npy
766,seismic-1058_546_467_0.png,fault-1058_546_467_0.npy,horizon-1058_546_467_0.npy
767,seismic-1130_2987_515_1.png,fault-1130_2987_515_1.npy,horizon-1130_2987_515_1.npy
768,seismic-1227_675_448_0.png,fault-1227_675_448_0.npy,horizon-1227_675_448_0.npy


In [8]:
cv = CrossVal(df, 3)
dataloaders = cv
# dataset = {}
# dataset['train'] = LabelMe(data_folder=os.path.join(DIR,'train'), transform=data_transforms['train'],
#                                 img_size=(1024, 1024))
# dataset['val'] = LabelMe(data_folder=os.path.join(DIR,'val'), transform=data_transforms['val'],
#                                 img_size=(1024, 1024))
# dataloaders = {x: torch.utils.data.DataLoader(dataset[x], batch_size = BATCH_SIZE,
#                                             shuffle = True, num_workers = 8, 
#                                             drop_last = False)
#                                             for x in ['train', 'val']}
class_count = len(dataloaders[0]['train'].dataset.label)

In [9]:
# Choose a model for training, you can refer to the models that have been imported above
# model = cracknet(pretrained = ModelParameters.PRETRAINED, num_classes = class_count)
model = UNet(num_classes = class_count)

my_optimizer = optim.Adam(model.parameters(), lr = ModelParameters.LEARNING_RATE) # Check https://pytorch.org/docs/stable/optim.html for other optimizers
my_lr_scheduler = optim.lr_scheduler.StepLR(my_optimizer, step_size=25, gamma=0.1) # Check https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate for other schedulers

## Start model training

In [10]:
train_main(model, class_count, ModelParameters.CRITERION, ModelParameters.EVAL_METRIC, ModelParameters.EPOCHS, DEVICE, my_optimizer, my_lr_scheduler, dataloaders, logging = ModelParameters.LOGGING, model_name = MODEL_FILENAME)

########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torch.Size([2, 3, 512, 512])
########
########
torc

UNet(
  (start_conv): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
  (down1): encoder(
    (down_conv): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mod

In [11]:
idx = 100
dl = DataLoader([],[],[], (512, 512))
img = []
img.append(dl.input_transforms(Image.open(os.path.join(dl.RAW_SEISMIC_FOLDER, df.at[idx, "RAW_SEISMIC"]))))
img.append(dl.input_transforms(Image.open(os.path.join(dl.RAW_SEISMIC_FOLDER, df.at[idx+1, "RAW_SEISMIC"]))))
img = torch.stack(img, dim=0)
# fault_name = '.'.join(self.img_name[idx].split('.', maxsplit = 1)[:-1]) + '.npy'
# fault = np.load(os.path.join(self.RAW_FAULT_FOLDER, self.fault_files[idx]), allow_pickle=True).astype(np.uint8)
# fault = cv2.resize(mask, self.img_size, cv2.INTER_AREA)
# fault_img = Image.open(os.path.join(self.RAW_FAULT_FOLDER, self.fault_files[idx]))
# fault = self.output_transforms(fault_img)
# fault  = np.array(fault_img)
# mask_name = '.'.join(self.img_name[idx].split('.', maxsplit = 1)[:-1]) + '.npy'
mask = []
mask.append(torch.from_numpy(cv2.resize(np.load(os.path.join(dl.FAULT_MASK_FOLDER, df.at[idx, "RAW_FAULT"])).astype(np.uint8), dl.IMAGE_SIZE, cv2.INTER_AREA).astype(np.int64)))
mask.append(torch.from_numpy(cv2.resize(np.load(os.path.join(dl.FAULT_MASK_FOLDER, df.at[idx+1, "RAW_FAULT"])).astype(np.uint8), dl.IMAGE_SIZE, cv2.INTER_AREA).astype(np.int64)))
mask = torch.stack(mask, dim=0)
img = img.to(DEVICE)
mask = mask.to(DEVICE)
mask_pred = model(img)

In [12]:
ModelParameters.CRITERION(mask_pred, mask)

tensor(0.0130, device='cuda:0', grad_fn=<MeanBackward0>)

In [13]:
eval_metrics(mask_pred, mask, class_count,'batch_pix_accuracy')

(array(514391), array(524288))

In [14]:
_, predict = torch.max(mask_pred.data, 1)
# predict = predict + 1
# mask = mask + 1
# labeled = (mask > 0) * (mask <= class_count)
# pixel_labeled = labeled.sum()
# pixel_correct = ((predict == mask) * labeled).sum()
# (pixel_correct.cpu().numpy(), pixel_labeled.cpu().numpy())

In [15]:
mask_pred.data[:,0]

tensor([[[1.2528, 1.0324, 0.9716,  ..., 1.5392, 0.7797, 0.8653],
         [1.3703, 1.5265, 1.3296,  ..., 1.2988, 1.3324, 0.8273],
         [1.0611, 1.2751, 1.7319,  ..., 1.3852, 1.5490, 1.3204],
         ...,
         [1.5109, 1.6967, 1.2422,  ..., 1.8407, 1.0848, 0.9935],
         [1.2166, 1.1667, 1.6384,  ..., 1.6461, 1.5434, 1.2493],
         [1.1835, 1.3927, 1.4105,  ..., 1.4602, 1.0232, 0.6089]],

        [[1.3910, 1.1742, 1.0979,  ..., 1.4799, 0.8115, 0.9015],
         [1.4628, 1.4435, 1.3227,  ..., 1.2167, 1.2140, 0.7777],
         [1.2362, 1.3182, 1.3709,  ..., 1.4530, 1.4025, 1.3525],
         ...,
         [1.5547, 1.0317, 1.2446,  ..., 1.1225, 1.3611, 0.8791],
         [1.3352, 1.5003, 1.8675,  ..., 1.2214, 1.8594, 1.3072],
         [0.7507, 0.6965, 1.4640,  ..., 1.1977, 1.0309, 0.8147]]],
       device='cuda:0')

In [16]:
predict.sum()

tensor(0, device='cuda:0')

In [17]:
cracknet_model = cracknet(pretrained = ModelParameters.PRETRAINED, num_classes = class_count)
cracknet_model_total_params = sum(p.numel() for p in cracknet_model.parameters())
cracknet_model_total_params

unet_model = UNet(num_classes = class_count)
unet_model_total_params = sum(p.numel() for p in unet_model.parameters())
unet_resnet_model = UNetResnet(num_classes = class_count)
unet_resnet_model_total_params = sum(p.numel() for p in unet_resnet_model.parameters())

segnet_model = SegNet(num_classes = class_count)
segnet_model_total_params  = sum(p.numel() for p in segnet_model.parameters())
segresnet_model = SegResNet(num_classes = class_count)
segresnet_model_total_params  = sum(p.numel() for p in segresnet_model.parameters())

NameError: name 'SegNet' is not defined

In [None]:
cracknet_model_total_params

In [None]:
unet_model_total_params

In [None]:
unet_resnet_model_total_params

In [None]:
segnet_model_total_params

In [None]:
segresnet_model_total_params

In [None]:
cracknet_model_total_params/segnet_model_total_params