In [1]:
# !pip install scipy --index-url=https://pypi.org/simple
# !pip install scikit-image --index-url=https://pypi.org/simple



In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision.models as models

# from skimage.feature import graycomatrix, graycoprops
from sklearn.metrics import confusion_matrix, classification_report

import pandas as pd
import numpy as np
from PIL import Image
import os
import matplotlib.pyplot as plt
from tqdm import tqdm

In [15]:
EPOCHS = 100

In [16]:
def Drawloss(loss_list, val_loss_list):
    lens = len(loss_list)
    fig = plt.figure(figsize=(8, 5))
    fig.add_subplot(2,2,(1,4))
    plt.style.use("ggplot")

    plt.plot(range(1, lens+1), loss_list, label="train_loss")
    plt.plot(range(1, lens+1), val_loss_list, label="val_loss")

    plt.xlabel("Epoch #")
    plt.ylabel("Loss")
    plt.legend(loc="upper right")

    plt.show()
    
def val_accuracy(model_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if device.type == 'cuda':
        model = torch.load(model_path)
    else:
        model = torch.load(model_path, map_location=torch.device('cpu'))
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(val_dataloader):
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        print(f"{model_path}, Val Accuracy: {accuracy:.2f}%")
        
def test_result(model_path, csv_filename):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if device.type == 'cuda':
        model = torch.load(model_path)
    else:
        model = torch.load(model_path, map_location=torch.device('cpu'))
    model.eval()
    correct = 0
    total = 0
    predicted_list = []
    with torch.no_grad():
        for images, labels in tqdm(test_dataloader):
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            predicted_list.append(predicted.item())
            
    test_df['Label'] = predicted_list
    test_df.to_csv(f'{csv_filename}', index=False)

# 1. Data preprocess

In [17]:
train_df = pd.read_csv('/kaggle/input/csv-idlabel/train.csv')
test_df = pd.read_csv('/kaggle/input/csv-idlabel/test.csv')

In [18]:
class CustomDataset(Dataset):
    def __init__(self, csv_path, images_folder, transform = False):
        self.df = pd.read_csv(csv_path)
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        filename = self.df.loc[index, "ID"]
        label = self.df.loc[index, "Label"].item()
        image = Image.open(os.path.join(self.images_folder, filename))
        if self.transform:
            image = self.transform(image)
        return image, label

In [19]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
])
test_transform = transforms.Compose([
    transforms.ToTensor()
])

In [20]:
dataset = CustomDataset('/kaggle/input/csv-idlabel/train.csv','/kaggle/input/train-imgs/train_images', transform=transform)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [0.9, 0.1])
test_dataset = CustomDataset('/kaggle/input/csv-idlabel/test.csv','/kaggle/input/test-imgs/test_images', transform=test_transform)

val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [21]:
for images, labels in train_dataloader:
    print(images.shape)
    print(labels.shape)
    break

torch.Size([32, 1, 512, 512])
torch.Size([32])


In [22]:
class EarlyStopper:
    def __init__(self, model_path, patience=20, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.model_path = model_path
        self.counter = 0
        self.min_val_loss = np.inf

    def check(self, val_loss, model):
        if val_loss < self.min_val_loss:
            self.min_val_loss = val_loss
            self.counter = 0
            torch.save(model, self.model_path)
        elif val_loss > (self.min_val_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False 

# 2. PSPNet with deeper Model

In [23]:
model_path = f'/kaggle/working/PSPNetDeeper_epoch{EPOCHS}_kaggle.pt'
predict_csv_path = f'/kaggle/working/PSPNetDeeper_epoch{EPOCHS}_kaggle.csv'

In [24]:
class PSPNet(nn.Module):
    def __init__(self, num_classes):
        super(PSPNet, self).__init__()
        
        # Conv layers
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1), # output size (N, 16, 512, 512)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 16, 256, 256)
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1), # output size (N, 32, 256, 256)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 32, 128, 128)
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1), # output size (N, 64, 128, 128)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 64, 64, 64)
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), # output size (N, 128, 64, 64)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 128, 32, 32)
        )
        # Spatial Pyramid Pooling layers
        self.pool1 = nn.AdaptiveMaxPool2d((1, 1)) # output size (N, 64, 1, 1)
        self.pool2 = nn.AdaptiveMaxPool2d((2, 2)) # output size (N, 64, 2, 2)
        self.pool3 = nn.AdaptiveMaxPool2d((3, 3)) # output size (N, 64, 3, 3)
        self.pool4 = nn.AdaptiveMaxPool2d((6, 6)) # output size (N, 64, 6, 6)
        self.con1 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 1, 1)
        self.con2 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 2, 2)
        self.con3 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 3, 3)
        self.con4 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 6, 6)
        # Upsampling layers
        self.upsample1 = nn.Upsample(scale_factor=32/1, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample2 = nn.Upsample(scale_factor=32/2, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample3 = nn.Upsample(scale_factor=32/3, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample4 = nn.Upsample(scale_factor=32/6, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        # Conv Classifier layers
        self.classifier = nn.Sequential(
            nn.Conv2d(in_channels=132, out_channels=64, kernel_size=3, stride=2, padding=1), # output size (N, 64, 16, 16)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 64, 8, 8)
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=2, padding=1), # output size (N, 32, 4, 4)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 32, 2, 2)
            nn.Flatten(), # output size (N, 32 * 2* 2)
            nn.Linear(32 * 2 * 2, 32), # output size (N, 32)
            nn.ReLU(),
            nn.Linear(32, num_classes), # output size (N, 6)
        )
        
    def forward(self, x):
        # CNN layers
        x = self.features(x)
        
        # Spatial Pyramid Pooling
        x1 = self.pool1(x)
        x1 = self.con1(x1) 
        x2 = self.pool2(x)
        x2 = self.con2(x2)
        x3 = self.pool3(x)
        x3 = self.con3(x3)
        x4 = self.pool4(x)
        x4 = self.con4(x4)
        
        # Upsampling
        x1 = self.upsample1(x1)
        x2 = self.upsample2(x2)
        x3 = self.upsample3(x3)
        x4 = self.upsample4(x4)
        
        # Concatenate the pooled features
        x = torch.cat((x1, x2, x3, x4, x), dim=1) # output size (N, 132, 32, 32)
        
        # Classifier
        x = self.classifier(x)
        return x

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PSPNet(num_classes=6).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

loss_list = []
val_loss_list = []
early_stopper = EarlyStopper(model_path = model_path)

for epoch in range(EPOCHS):
    model.train()
    for images, labels in train_dataloader:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    loss_list.append(loss.item())
    
    model.eval()
    with torch.no_grad():
        tmp_loss_list = []
        for images, labels in val_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            val_loss = criterion(outputs, labels)
            tmp_loss_list.append(val_loss.item())
        avg_val_loss = sum(tmp_loss_list)/len(tmp_loss_list)
        val_loss_list.append(avg_val_loss)
        if early_stopper.check(avg_val_loss, model):
            print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.4f}, Val Loss:{avg_val_loss:.4f},\nEarly stop in {epoch+1}!!')
            break
            
    if (epoch+1) % 1 == 0 or epoch == 0:
        print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.4f}, Val Loss:{avg_val_loss:.4f}')

Epoch [1/100], Train Loss: 1.7022, Val Loss:1.6562
Epoch [2/100], Train Loss: 1.4983, Val Loss:1.6216
Epoch [3/100], Train Loss: 0.9071, Val Loss:1.1902
Epoch [4/100], Train Loss: 1.1987, Val Loss:1.0668
Epoch [5/100], Train Loss: 1.4066, Val Loss:0.7340
Epoch [6/100], Train Loss: 0.3545, Val Loss:0.4360
Epoch [7/100], Train Loss: 0.7745, Val Loss:0.3941
Epoch [8/100], Train Loss: 0.0501, Val Loss:0.3313
Epoch [9/100], Train Loss: 0.5201, Val Loss:0.6773
Epoch [10/100], Train Loss: 0.0021, Val Loss:0.3413
Epoch [11/100], Train Loss: 0.2246, Val Loss:0.3155
Epoch [12/100], Train Loss: 0.0195, Val Loss:0.3296
Epoch [13/100], Train Loss: 0.0480, Val Loss:0.2751
Epoch [14/100], Train Loss: 0.3610, Val Loss:0.2136
Epoch [15/100], Train Loss: 0.3463, Val Loss:0.1827
Epoch [16/100], Train Loss: 0.0494, Val Loss:0.2001
Epoch [17/100], Train Loss: 0.1086, Val Loss:0.1781
Epoch [18/100], Train Loss: 0.0511, Val Loss:0.1557
Epoch [19/100], Train Loss: 0.0233, Val Loss:0.7034
Epoch [20/100], Train

In [None]:
Drawloss(loss_list, val_loss_list)

In [None]:
# torch.save(model, model_path)
val_accuracy(model_path)
test_result(model_path, predict_csv_path)

# 2.5 PSPNet + GLCM features

In [10]:
model_path = f'/kaggle/working/PSPNetGLCM_epoch{EPOCHS}_kaggle.pt'
predict_csv_path = f'/kaggle/working/PSPNetGLCM_epoch{EPOCHS}_kaggle.csv'

In [None]:
def GLCM_features(image):
    image = np.array(image)
    image = (image * 255).astype(np.uint8)
    glcm_features = torch.empty(25, dtype=torch.float32)

    #5 configuration for the grey-level co-occurrence matrix calculation
    dists = [[1],[3],[5],[3],[3],[3]]
    angles = [[0],[0],[0],[np.pi/4],[np.pi/2],[np.pi*3/4],]

    for j ,(dist, angle) in enumerate(zip(dists, angles)):
        GLCM = graycomatrix(image, dist, angle) 
        glcm_features[j*5] = torch.tensor(graycoprops(GLCM, 'energy')[0], dtype=torch.float32)
        glcm_features[j*5 + 1] = torch.tensor(graycoprops(GLCM, 'correlation')[0] , dtype=torch.float32)   
        glcm_features[j*5 + 2] = torch.tensor(graycoprops(GLCM, 'dissimilarity')[0], dtype=torch.float32)
        glcm_features[j*5 + 3] = torch.tensor(graycoprops(GLCM, 'homogeneity')[0], dtype=torch.float32)
        glcm_features[j*5 + 4] = torch.tensor(graycoprops(GLCM, 'contrast')[0], dtype=torch.float32)
        
    return glcm_features

In [None]:
class GLCMDataset(Dataset):
    def __init__(self, csv_path, images_folder, transform = False):
        self.df = pd.read_csv(csv_path)
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        filename = self.df.loc[index, "ID"]
        label = self.df.loc[index, "Label"].item()
        image = Image.open(os.path.join(self.images_folder, filename))
        glcm_feature = GLCM_features(image)
        if self.transform:
            image = self.transform(image)
        return image, label, glcm_feature

In [None]:
dataset = GLCMDataset('/kaggle/input/csv-index/train.csv','/kaggle/input/train-image-aoi/train_images', transform=transform)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [0.9, 0.1])
test_dataset = GLCMDataset('/kaggle/input/csv-index/test.csv','/kaggle/input/test-image-aoi/test_images', transform=test_transform)

val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [None]:
for images, labels, glcm_features in train_dataloader:
    print(images.shape)
    print(labels.shape)
    print(glcm_features.shape)
    break

In [None]:
class PSPNetGLCM(nn.Module):
    def __init__(self, num_classes):
        super(PSPNetGLCM, self).__init__()
        
        # Conv layers
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1), # output size (N, 16, 512, 512)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 16, 256, 256)
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1), # output size (N, 32, 256, 256)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 32, 128, 128)
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1), # output size (N, 64, 128, 128)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 64, 64, 64)
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), # output size (N, 128, 64, 64)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 128, 32, 32)
        )
        # Spatial Pyramid Pooling layers
        self.pool1 = nn.AdaptiveMaxPool2d((1, 1)) # output size (N, 128, 1, 1)
        self.pool2 = nn.AdaptiveMaxPool2d((2, 2)) # output size (N, 128, 2, 2)
        self.pool3 = nn.AdaptiveMaxPool2d((3, 3)) # output size (N, 128, 3, 3)
        self.pool4 = nn.AdaptiveMaxPool2d((6, 6)) # output size (N, 128, 6, 6)
        self.con1 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 1, 1)
        self.con2 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 2, 2)
        self.con3 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 3, 3)
        self.con4 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 6, 6)
        # Upsampling layers
        self.upsample1 = nn.Upsample(scale_factor=32/1, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample2 = nn.Upsample(scale_factor=32/2, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample3 = nn.Upsample(scale_factor=32/3, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample4 = nn.Upsample(scale_factor=32/6, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        
        # Conv Classifier layers
        self.nn_classifier = nn.Sequential(
            nn.Conv2d(in_channels=132, out_channels=64, kernel_size=3, stride=2, padding=1), # output size (N, 64, 16, 16)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 64, 8, 8)
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=2, padding=1), # output size (N, 32, 4, 4)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 32, 2, 2)
            nn.Flatten(), # output size (N, 32 * 2* 2)
            nn.Linear(32 * 2 * 2, 24), # output size (N, 24)
            nn.ReLU(),
        )
        self.glcm_classifier= nn.Sequential(
            nn.Linear(25, 8), # output size (N, 8)
            nn.ReLU(),
        )
        self.final_classifier = nn.Sequential(
            nn.Linear(32, num_classes) # output size (N, num_classes=6)
        )
        
    def forward(self, x_input, x_glcm):
        # CNN layers
        x = self.features(x_input)
        
        # Spatial Pyramid Pooling
        x1 = self.pool1(x)
        x1 = self.con1(x1) 
        x2 = self.pool2(x)
        x2 = self.con2(x2)
        x3 = self.pool3(x)
        x3 = self.con3(x3)
        x4 = self.pool4(x)
        x4 = self.con4(x4)
        
        # Upsampling
        x1 = self.upsample1(x1)
        x2 = self.upsample2(x2)
        x3 = self.upsample3(x3)
        x4 = self.upsample4(x4)
        
        # Concatenate the pooled features
        x = torch.cat((x1, x2, x3, x4, x), dim=1) # output size (N, 132, 32, 32)
        
        # Classifier
        x = self.nn_classifier(x) # output size (N, 24)
        
        # Get GLCM features 
        x_glcm = self.glcm_classifier(x_glcm) # output size (N, 8)
        
        # Concatenate nn features and GLCM features
        x = torch.cat((x, x_glcm), dim=1) # output size (N, 32, 32)
        
        # final classifier
        x = self.final_classifier(x)
        
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PSPNetGLCM(num_classes=6).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

loss_list = []
val_loss_list = []
early_stopper = EarlyStopper(model_path = model_path)

for epoch in range(EPOCHS):
    model.train()
    for images, labels, glcm_features in tqdm(train_dataloader):
        images = images.to(device)
        labels = labels.to(device)
        glcm_features = glcm_features.to(device)
        
        optimizer.zero_grad()
        outputs = model(images, glcm_features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    loss_list.append(loss.item())
    
    model.eval()
    with torch.no_grad():
        tmp_loss_list = []
        for images, labels, glcm_features in val_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            glcm_features = glcm_features.to(device)
            
            outputs = model(images, glcm_features)
            val_loss = criterion(outputs, labels)
            tmp_loss_list.append(val_loss.item())
        avg_val_loss = sum(tmp_loss_list)/len(tmp_loss_list)
        val_loss_list.append(avg_val_loss)
        if early_stopper.check(avg_val_loss, model):
            print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.4f}, Val Loss:{avg_val_loss:.4f},\nEarly stop in {epoch+1}!!')
            break
            
    if (epoch+1) % 1 == 0 or epoch == 0:
        print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.4f}, Val Loss:{avg_val_loss:.4f}')

In [None]:
def val_accuracy(model_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if device.type == 'cuda':
        model = torch.load(model_path)
    else:
        model = torch.load(model_path, map_location=torch.device('cpu'))
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels, glcm_features in tqdm(val_dataloader):
            images = images.to(device)
            labels = labels.to(device)
            glcm_features = glcm_features.to(device)
            
            outputs = model(images, glcm_features)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        print(f"{model_path}, Val Accuracy: {accuracy:.2f}%")

In [None]:
def test_result(model_path, csv_filename):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if device.type == 'cuda':
        model = torch.load(model_path)
    else:
        model = torch.load(model_path, map_location=torch.device('cpu'))
    model.eval()
    correct = 0
    total = 0
    predicted_list = []
    with torch.no_grad():
        for images, labels, glcm_features in tqdm(test_dataloader):
            images = images.to(device)
            labels = labels.to(device)
            glcm_features = glcm_features.to(device)
            
            outputs = model(images, glcm_features)
            _, predicted = torch.max(outputs.data, 1)
            predicted_list.append(predicted.item())
            
    test_df['Label'] = predicted_list
    test_df.to_csv(f'{csv_filename}', index=False)

In [None]:
# Drawloss(loss_list, val_loss_list)
val_accuracy(model_path)
test_result(model_path, predict_csv_path)

# 3.

In [26]:
model_path = f'/kaggle/working/PSPNetDeeper_epoch{EPOCHS}_kaggle.pt'
predict_csv_path = f'/kaggle/working/PSPNetDeeper_epoch{EPOCHS}_kaggle.csv'

In [28]:
pip install mahotas

Collecting mahotas
  Downloading mahotas-1.4.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: mahotas
Successfully installed mahotas-1.4.13
Note: you may need to restart the kernel to use updated packages.


In [29]:
import mahotas as mh
import cv2

In [30]:
def Haralick_features(image):
    features = mh.features.haralick(image).mean(axis=0)
    features = torch.from_numpy(features).type(torch.float32)
    return features

In [31]:
class MyDataset(Dataset):
    def __init__(self, csv_path, images_folder, transform = False):
        self.df = pd.read_csv(csv_path)
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        filename = self.df.loc[index, "ID"]
        label = self.df.loc[index, "Label"].item()
        image = cv2.imread(os.path.join(self.images_folder, filename))
        features = Haralick_features(image)
        image1 = Image.open(os.path.join(self.images_folder, filename))
        if self.transform:
            image1 = self.transform(image1)
        return image1, label, features

In [32]:
dataset = MyDataset('/kaggle/input/csv-idlabel/train.csv','/kaggle/input/train-imgs/train_images', transform=transform)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [0.9, 0.1])
test_dataset = MyDataset('/kaggle/input/csv-idlabel/test.csv','/kaggle/input/test-imgs/test_images', transform=test_transform)

val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [33]:
for images, labels, features in train_dataloader:
    print(images.shape)
    print(labels.shape)
    print(features.shape)

    print(images.dtype)
    print(features.dtype)
    break

torch.Size([32, 1, 512, 512])
torch.Size([32])
torch.Size([32, 13])
torch.float32
torch.float32


In [34]:
class PSPNetGLCM(nn.Module):
    def __init__(self, num_classes):
        super(PSPNetGLCM, self).__init__()

        # Conv layers
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1), # output size (N, 16, 512, 512)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 16, 256, 256)
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1), # output size (N, 32, 256, 256)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 32, 128, 128)
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1), # output size (N, 64, 128, 128)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 64, 64, 64)
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), # output size (N, 128, 64, 64)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 128, 32, 32)
        )
        # Spatial Pyramid Pooling layers
        self.pool1 = nn.AdaptiveMaxPool2d((1, 1)) # output size (N, 128, 1, 1)
        self.pool2 = nn.AdaptiveMaxPool2d((2, 2)) # output size (N, 128, 2, 2)
        self.pool3 = nn.AdaptiveMaxPool2d((3, 3)) # output size (N, 128, 3, 3)
        self.pool4 = nn.AdaptiveMaxPool2d((6, 6)) # output size (N, 128, 6, 6)
        self.con1 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 1, 1)
        self.con2 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 2, 2)
        self.con3 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 3, 3)
        self.con4 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) # output size (N, 1, 6, 6)
        # Upsampling layers
        self.upsample1 = nn.Upsample(scale_factor=32/1, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample2 = nn.Upsample(scale_factor=32/2, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample3 = nn.Upsample(scale_factor=32/3, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)
        self.upsample4 = nn.Upsample(scale_factor=32/6, mode='bilinear', align_corners=True) # output size (N, 1, 32, 32)

        # Conv Classifier layers
        self.nn_classifier = nn.Sequential(
            nn.Conv2d(in_channels=132, out_channels=64, kernel_size=3, stride=2, padding=1), # output size (N, 64, 16, 16)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 64, 8, 8)
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=2, padding=1), # output size (N, 32, 4, 4)
            nn.ReLU(),
            nn.MaxPool2d(2), # output size (N, 32, 2, 2)
            nn.Flatten(), # output size (N, 32 * 2* 2)
            nn.Linear(32 * 2 * 2, 24), # output size (N, 24)
            nn.ReLU(),
        )
        self.glcm_classifier= nn.Sequential(
            nn.Linear(13, 8), # output size (N, 8)
            nn.ReLU(),
        )
        self.final_classifier = nn.Sequential(
            nn.Linear(32, num_classes) # output size (N, num_classes=6)
        )

    def forward(self, x_input, x_glcm):
        # CNN layers
        x = self.features(x_input)

        # Spatial Pyramid Pooling
        x1 = self.pool1(x)
        x1 = self.con1(x1)
        x2 = self.pool2(x)
        x2 = self.con2(x2)
        x3 = self.pool3(x)
        x3 = self.con3(x3)
        x4 = self.pool4(x)
        x4 = self.con4(x4)

        # Upsampling
        x1 = self.upsample1(x1)
        x2 = self.upsample2(x2)
        x3 = self.upsample3(x3)
        x4 = self.upsample4(x4)

        # Concatenate the pooled features
        x = torch.cat((x1, x2, x3, x4, x), dim=1) # output size (N, 132, 32, 32)

        # Classifier
        x = self.nn_classifier(x) # output size (N, 24)

        # Get GLCM features
        x_glcm = self.glcm_classifier(x_glcm) # output size (N, 8)

        # Concatenate nn features and GLCM features
        x = torch.cat((x, x_glcm), dim=1) # output size (N, 32, 32)

        # final classifier
        x = self.final_classifier(x)

        return x

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = PSPNetGLCM(num_classes=6).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

loss_list = []
val_loss_list = []
early_stopper = EarlyStopper(model_path = model_path)

for epoch in range(EPOCHS):
    model.train()
    for images, labels, glcm_features in tqdm(train_dataloader):
        images = images.to(device)
        labels = labels.to(device)
        glcm_features = glcm_features.to(device)

        optimizer.zero_grad()
        outputs = model(images, glcm_features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    loss_list.append(loss.item())

    model.eval()
    with torch.no_grad():
        tmp_loss_list = []
        for images, labels, glcm_features in val_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            glcm_features = glcm_features.to(device)

            outputs = model(images, glcm_features)
            val_loss = criterion(outputs, labels)
            tmp_loss_list.append(val_loss.item())
        avg_val_loss = sum(tmp_loss_list)/len(tmp_loss_list)
        val_loss_list.append(avg_val_loss)
        if early_stopper.check(avg_val_loss, model):
            print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.4f}, Val Loss:{avg_val_loss:.4f},\nEarly stop in {epoch+1}!!')
            break

    if (epoch+1) % 1 == 0 or epoch == 0:
        print(f'Epoch [{epoch+1}/{EPOCHS}], Train Loss: {loss.item():.4f}, Val Loss:{avg_val_loss:.4f}')

cuda


100%|██████████| 72/72 [09:44<00:00,  8.12s/it]


Epoch [1/100], Train Loss: 0.7525, Val Loss:5.5607


100%|██████████| 72/72 [09:38<00:00,  8.03s/it]


Epoch [2/100], Train Loss: 1.5417, Val Loss:2.2560


100%|██████████| 72/72 [09:37<00:00,  8.02s/it]


Epoch [3/100], Train Loss: 0.8339, Val Loss:1.7435


100%|██████████| 72/72 [09:33<00:00,  7.96s/it]


Epoch [4/100], Train Loss: 1.5045, Val Loss:1.5920


100%|██████████| 72/72 [09:37<00:00,  8.02s/it]


Epoch [5/100], Train Loss: 0.5507, Val Loss:1.4215


100%|██████████| 72/72 [09:39<00:00,  8.05s/it]


Epoch [6/100], Train Loss: 0.1180, Val Loss:0.8902


100%|██████████| 72/72 [09:37<00:00,  8.02s/it]


Epoch [7/100], Train Loss: 0.8293, Val Loss:0.8533


100%|██████████| 72/72 [09:33<00:00,  7.96s/it]


Epoch [8/100], Train Loss: 0.4737, Val Loss:0.4945


100%|██████████| 72/72 [09:23<00:00,  7.83s/it]


Epoch [9/100], Train Loss: 0.1260, Val Loss:0.5046


100%|██████████| 72/72 [09:24<00:00,  7.84s/it]


Epoch [10/100], Train Loss: 0.5086, Val Loss:0.4427


100%|██████████| 72/72 [09:24<00:00,  7.85s/it]


Epoch [11/100], Train Loss: 0.8230, Val Loss:0.4022


100%|██████████| 72/72 [09:33<00:00,  7.97s/it]


Epoch [12/100], Train Loss: 0.0676, Val Loss:0.3183


100%|██████████| 72/72 [09:38<00:00,  8.03s/it]


Epoch [13/100], Train Loss: 0.0532, Val Loss:0.2831


100%|██████████| 72/72 [09:48<00:00,  8.18s/it]


Epoch [14/100], Train Loss: 0.0241, Val Loss:0.1282


100%|██████████| 72/72 [09:43<00:00,  8.10s/it]


Epoch [15/100], Train Loss: 0.0685, Val Loss:0.1184


100%|██████████| 72/72 [09:40<00:00,  8.06s/it]


Epoch [16/100], Train Loss: 0.0001, Val Loss:0.2200


100%|██████████| 72/72 [09:38<00:00,  8.03s/it]


Epoch [17/100], Train Loss: 0.0178, Val Loss:0.1377


 40%|████      | 29/72 [04:01<05:58,  8.34s/it]


KeyboardInterrupt: 