In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

path = "kaggle_3m"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
print(device)

import os
import pandas as pd
from PIL import Image
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset

class ImageFolderDataset(Dataset):
    def __init__(self, root_dir, data_frame):
        self.root_dir = root_dir
        self.labels = data_frame
        self.labels.dropna(subset=['tumor_location'], inplace=True)
        self.transform = ToTensor()

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        label = self.labels.loc[idx, 'death01']
        folder_name = self.labels.iloc[idx, 0]

        images = []

        for dir_name in os.listdir(self.root_dir):
            if dir_name.startswith(folder_name):
                img_folder = os.path.join(self.root_dir, dir_name)

                for img_name in sorted(os.listdir(img_folder)):
                    if not img_name.endswith('mask.tif'):
                        img_path = os.path.join(img_folder, img_name)

                        image = Image.open(img_path)
                        image = self.transform(image)
                        images.append(image)

        images = torch.stack(images)

        RNA_dat = self.labels.loc[idx,'RNASeqCluster':'neoplasm_histologic_grade']
        return images.float().to(device), torch.tensor([label]).float().to(device), torch.tensor(RNA_dat).float().to(device)



df = pd.read_csv('kaggle_3m/data.csv')
df.dropna(subset=['death01'], inplace=True)
df = df.fillna(0)
# df = df.replace({'death01' : class_mapping})
print(df['death01'].unique())

dataset = ImageFolderDataset(root_dir='kaggle_3m', data_frame = df)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 5, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(3, 3)
        self.conv2 = nn.Conv2d(5, 6, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(28*28*6, 5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc(x))
        return x

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(5, 10, batch_first=True)
        self.fc1 = nn.Linear(10, 5)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc1(F.relu(x[:, -1, :]))
        return x

class CombinedModel(nn.Module):
    def __init__(self):
        super(CombinedModel, self).__init__()
        self.cnn = CNN()
        self.lstm = LSTM()
        self.fc1 = nn.Linear(9 + 5, 20)
        self.fc2 = nn.Linear(20, 1)

    def forward(self, x,rna):
        batch_size, timesteps, C, H, W = x.size()
        x = x.view(timesteps, C, H, W)
        x = self.cnn(x)
        x = x.view(batch_size, timesteps, -1)
        x = self.lstm(x)
        x = torch.cat((x,rna),1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = torch.sigmoid(x)
        return x

model = CombinedModel().to(device)

print(model)



In [None]:
import torch.optim as optim

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=True)

def BCELoss_class_weighted(weights):

    def loss(input, target):
        input = torch.clamp(input,min=1e-7,max=1-1e-7)
        bce = - weights[1] * target * torch.log(input) - (1 - target) * weights[0] * torch.log(1 - input)
        return torch.mean(bce)

    return loss

criterion = BCELoss_class_weighted([1,82/(110-82)])
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 6
for epoch in range(num_epochs):
    correct = 0
    total = 0
    for i, (inputs, labels,rna) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs,rna)
        loss = criterion(outputs, labels)

        predicted = torch.round(outputs.data)
        total += 1
        # print(predicted,labels)
        correct += (predicted == labels).sum().item()
        loss.backward()
        optimizer.step()

        torch.cuda.empty_cache()

    accuracy = 100 * correct / total
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}, Accuracy: {accuracy}%')

print("Training complete.")



In [None]:
correct = 0
total = 0

model.eval()
tp = 0
fp = 0
fn = 0
tn = 0
for inputs, labels,rna in val_loader:
    outputs = model(inputs,rna)
    predicted = torch.round(outputs.data)
    total += labels.size(0)

    tp += ((predicted == 1) and (labels == 1)).sum().item()
    fp += ((predicted == 1) and (labels == 0)).sum().item()
    fn += ((predicted == 0) and (labels == 1)).sum().item()
    tn += ((predicted == 0) and (labels == 0)).sum().item()


accuracy = 100 * (tp+tn) / (tp+tn+fn+fp)
precision = 100 * (tp) / (tp+fp)
recall = 100 * (tp) / (tp + fn)
print(tp,tn,fp,fn)
print(accuracy,precision,recall)