In [1]:
## Loading required libraries
import torch
import torch.optim as optim
import torch.nn as nn
import os
from torch.utils.data import Dataset, DataLoader , random_split
from torchvision import transforms , datasets
from PIL import Image
import pandas as pd
import re
import random
import shutil
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
##checking if CUDA is available


if torch.cuda.is_available():
    print("CUDA is available!")
else:
    print("CUDA is not availabe")

meta_files = os.listdir("/home/jaskaran/data_selection/meta_file")
dfs = []

CUDA is available!


In [4]:
## custom class for the dataset
class GeneticDataset(Dataset):
    def __init__(self,root_folder, transform =None):
        self.root_folder = root_folder
        self.transform = transform
        self.class_labels = sorted(os.listdir(root_folder))
        self.class_to_idx = {label: idx for idx ,label in enumerate(self.class_labels)}
        self.file_list = self._build_file_list()
    
    def _build_file_list(self):
        file_list = []
        for class_label in self.class_labels:
            class_path = os.path.join(self.root_folder,class_label)
            class_image = [os.path.join(class_path,image_path) for image_path in os.listdir(class_path)]
            file_list.extend(class_image)
        return file_list
    
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self,index):
        file_path = self.file_list[index]
        array = np.loadtxt(file_path)
        class_label = os.path.basename(os.path.dirname(file_path))
        label = self.class_to_idx[class_label]
        find = re.match(r".+?[_].+?[_].+?[_](.+?)[.][t][x][t]",file_path)
        if label == 0:
            matrix = np.loadtxt(F"/home/jaskaran/data_matrix/bottleneck/matrix_{find[1]}.txt")
        if label == 1:
            matrix = np.loadtxt(F"/home/jaskaran/data_matrix/selection/matrix_{find[1]}.txt")
        mean = np.mean(array)
        std = np.std(array)
        array = (array - mean) / std
        array = torch.from_numpy(array)

        mean1 = np.mean(matrix)
        std1 = np.std(matrix)
        matrix = (matrix - mean1) / std1
        matrix= torch.from_numpy(matrix)
        
        return {'matrix': matrix, 'label': label, "coefficient" : float(find[1]) , "array" : array}


In [5]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))])
combined_data = GeneticDataset(root_folder="/home/jaskaran/data_arrays", transform= transform)

length_combined_data = len(combined_data)
training_data_len = int(0.8*length_combined_data)
testing_data_len = length_combined_data - training_data_len
generator = torch.Generator().manual_seed(46)
training_data , testing_data = random_split(combined_data,[training_data_len,testing_data_len],generator=generator)

training_data_loader = DataLoader(training_data,batch_size=32,shuffle=True, drop_last=True)
testing_data_loader = DataLoader(testing_data, batch_size=32, shuffle=True, drop_last=True ) 


In [6]:
testing_data[1]

{'matrix': tensor([[-0.0924, -0.0749, -0.0574,  ..., 17.3477, 17.3652, 17.3827],
         [-0.0924, -0.0924, -0.0924,  ..., -0.0574, -0.0574, -0.0924],
         [-0.0924, -0.0924, -0.0924,  ..., -0.0574, -0.0574, -0.0924],
         ...,
         [-0.0924, -0.0924, -0.0924,  ..., -0.0749, -0.0749, -0.0924],
         [-0.0924, -0.0924, -0.0924,  ..., -0.0574, -0.0574, -0.0924],
         [-0.0924, -0.0924, -0.0924,  ..., -0.0574, -0.0574, -0.0924]],
        dtype=torch.float64),
 'label': 0,
 'coefficient': 0.748,
 'array': tensor([[-0.2235,  6.4625,  2.0052,  ..., -0.2235, -0.2235, -0.2235],
         [-0.2235,  2.0052, -0.2235,  ..., -0.2235, -0.2235, -0.2235],
         [-0.2235,  2.0052, -0.2235,  ..., -0.2235, -0.2235, -0.2235],
         ...,
         [-0.2235, -0.2235,  6.4625,  ..., -0.2235, -0.2235, -0.2235],
         [-0.2235,  4.2338, -0.2235,  ..., -0.2235, -0.2235, -0.2235],
         [-0.2235, -0.2235,  2.0052,  ..., -0.2235, -0.2235, -0.2235]],
        dtype=torch.float64)}

In [11]:
class multimodalCNN(nn.Module):

    def __init__(self):
        super(multimodalCNN,self).__init__()
        self.matrix_CNN = nn.Sequential(
            nn.Conv2d(in_channels=1 , out_channels = 16, kernel_size=(9,15),stride=1,padding=15),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(start_dim=1))
        self.array_CNN = nn.Sequential(
            nn.Conv2d(in_channels=1,out_channels = 32,kernel_size = 9,stride = 1,padding = 9),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(start_dim=1))
        self.fc1 = nn.Linear(591632,256)
        self.fc2 = nn.Linear(256,128)
        self.fc3 = nn.Linear(128,2)
        self.dropout = nn.Dropout(p=0.2)
        self.relu = nn.ReLU()
    
    def forward(self,matrix,array):
        array = array.float()
        matrix = matrix.float()
        image_features = self.matrix_CNN(matrix)
        array_features = self.array_CNN(array)
        combined_features = torch.cat((image_features, array_features), dim=1)

        x = self.fc1(combined_features)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc3(x)
        return(x)


In [12]:
torch.manual_seed(44)

if torch.cuda.is_available():
    torch.cuda.manual_seed(46)
    torch.cuda.manual_seed_all(46)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

model = multimodalCNN()
model = model.to("cuda:1")

criterion = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(),lr = 0.0001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for batch in training_data_loader:
        matrix = batch["matrix"].to("cuda:1")
        matrix = matrix.unsqueeze(1)
        arrays = batch["array"]
        arrays = arrays.unsqueeze(1)
        arrays = arrays.to("cuda:1")
        labels = batch["label"].to("cuda:1")
        optimiser.zero_grad()
        output = model(array= arrays , matrix = matrix)
        loss = criterion(output,labels)
        loss.backward()
        optimiser.step()
        total_loss += loss.item() * labels.size(0)  # Multiply by batch size
    
    # Calculate average loss over all batches
    average_loss = total_loss / len(training_data_loader.dataset)
    
    model.eval()

    correct = 0
    total = 0

    for batch in testing_data_loader:
        matrix = batch["matrix"].to("cuda:1")
        matrix = matrix.unsqueeze(1)
        arrays = batch["array"]
        arrays = arrays.unsqueeze(1)
        arrays = arrays.to("cuda:1")
        labels = batch["label"].to("cuda:1")
        optimiser.zero_grad()
        output = model(array= arrays , matrix = matrix)
        _ , predicted = torch.max(output.data,1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct/total

    print(f"Epoch : {epoch} , Loss : {average_loss:.2f} , accuracy : {accuracy:.2%}")

print("Model is trained")

Epoch : 0 , Loss : 0.12 , accuracy : 97.53%
Epoch : 1 , Loss : 0.09 , accuracy : 98.44%
Epoch : 2 , Loss : 0.07 , accuracy : 98.59%
Epoch : 3 , Loss : 0.06 , accuracy : 98.14%
Epoch : 4 , Loss : 0.05 , accuracy : 98.49%


In [None]:
torch.save(model.state_dict(), f'model_state_dict_combined_data.pth')