In [1]:
import argparse
import os
import sys
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from tqdm import tqdm
from torchsummary import summary
import tabulate
import torchvision.models as models
import cv2
import numpy as np
import torch.utils.data as utils
from sklearn.model_selection import train_test_split

In [2]:
def get_data(data_dir, mag):
    '''
    data_dir: Data directory containing all the extracted features
    mag: Magnification of the images i.e. ['40X', '100X', '200X', '400X']
    '''
    X, y = list(), list()
    data_path = data_dir + mag
    for l in os.listdir(data_path):
        xi = np.load(data_path+l)
        yi = (l[4]=='M')
        X.append(xi)
        y.append(yi)
    X, y = np.array(X), np.array(y)
    return X, y

In [3]:
data_dir = '../data/resnet/'

In [4]:
X, y = get_data(data_dir, '40X/')
#X, y = get_data(data_dir, '100X/')
#X, y = get_data(data_dir, '200X/')
#X, y = get_data(data_dir, '400X/')

data = list(zip(X, y))
data = np.array(data)
len(data)

1995

In [5]:
train_x, vtd_x, train_y, vtd_y = train_test_split(data[:, 0:1], data[:, 1:], test_size=0.20, random_state=42)
val_x, test_x, val_y, test_y = train_test_split(vtd_x, vtd_y, test_size=0.33, random_state=42)

train_x = np.array([np.squeeze(i[0]) for i in train_x])
train_y = np.array([[float(i[0])] for i in train_y])
print('Training Data: {}, {}'.format(train_x.shape, train_y.shape))

val_x = np.array([np.squeeze(i[0]) for i in val_x])
val_y = np.array([[float(i[0])] for i in val_y])
print('Validation Data: {}, {}'.format(val_x.shape, val_y.shape))

test_x = np.array([np.squeeze(i[0]) for i in test_x])
test_y = np.array([[float(i[0])] for i in test_y])
print('Testing Data: {}, {}'.format(test_x.shape, test_y.shape))

Training Data: (1596, 2048), (1596, 1)
Validation Data: (267, 2048), (267, 1)
Testing Data: (132, 2048), (132, 1)


In [6]:
##################################################
# For weighted Binary Cross Entropy
# To apply inverse class frequency as a re-weighting
# strategy we should have two output probabilities
def get_weights(data):
    sumP, sumN = 0, 0
    for xi in data:
        if xi:
            sumP += 1
        else:
            sumN += 1
    return torch.tensor([0.5/sumP, 0.5/sumN])
##################################################
weight = get_weights(train_y)
weight

tensor([0.0005, 0.0010])

In [7]:
t_x = torch.stack([torch.Tensor(i) for i in train_x]) # transform to torch tensors
t_y = torch.stack([torch.Tensor(i) for i in train_y])
my_dataset = utils.TensorDataset(t_x,t_y) # create your datset
train_dataloader = utils.DataLoader(my_dataset) # create your dataloader

In [8]:
v_x = torch.stack([torch.Tensor(i) for i in val_x]) # transform to torch tensors
v_y = torch.stack([torch.Tensor(i) for i in val_y])
my_dataset = utils.TensorDataset(v_x,v_y) # create your datset
val_dataloader = utils.DataLoader(my_dataset) # create your dataloader

In [9]:
te_x = torch.stack([torch.Tensor(i) for i in test_x]) # transform to torch tensors
te_y = torch.stack([torch.Tensor(i) for i in test_y])
my_dataset = utils.TensorDataset(te_x,te_y) # create your datset
test_dataloader = utils.DataLoader(my_dataset) # create your dataloader

In [10]:
t_x.shape, t_y.shape, v_x.shape, v_y.shape, te_x.shape, te_y.shape

(torch.Size([1596, 2048]),
 torch.Size([1596, 1]),
 torch.Size([267, 2048]),
 torch.Size([267, 1]),
 torch.Size([132, 2048]),
 torch.Size([132, 1]))

In [11]:
# Neural Network Architecture
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 64)
        self.fc4 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        x = torch.sigmoid(x)
        return x

In [12]:
model = Model()
criterion = nn.BCELoss(weight=None)
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=0.001,
    weight_decay=5e-4
)

In [13]:
for epoch in range(1, 30): 
    train_loss, valid_loss = [], []
    
    model.train()
    for data, target in train_dataloader:
        optimizer.zero_grad()
        
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.item())
        
    ## evaluation part 
    model.eval()
    for data, target in val_dataloader:
        output = model(data)
        loss = criterion(output, target)
        valid_loss.append(loss.item())
    print ("Epoch:", epoch, "Training Loss: ", np.mean(train_loss), "Valid Loss: ", np.mean(valid_loss))

Epoch: 1 Training Loss:  0.6516669427379147 Valid Loss:  0.6462739032975743
Epoch: 2 Training Loss:  0.6451555254563216 Valid Loss:  0.6485401658752884
Epoch: 3 Training Loss:  0.635424323038771 Valid Loss:  0.6470915210827459
Epoch: 4 Training Loss:  0.7567313926419857 Valid Loss:  0.6495683031358969
Epoch: 5 Training Loss:  0.8215406119371971 Valid Loss:  0.6467727295468363
Epoch: 6 Training Loss:  0.6417773289659952 Valid Loss:  0.6465383236774345
Epoch: 7 Training Loss:  0.7501387623427194 Valid Loss:  0.6476074102219571
Epoch: 8 Training Loss:  0.7432385139936184 Valid Loss:  0.652194360333882
Epoch: 9 Training Loss:  0.7373811368869618 Valid Loss:  0.6492541941364159
Epoch: 10 Training Loss:  0.617450839893561 Valid Loss:  0.650145574343785
Epoch: 11 Training Loss:  0.7426798149754062 Valid Loss:  0.6500865234417862
Epoch: 12 Training Loss:  0.7836136683415438 Valid Loss:  0.649812553036079
Epoch: 13 Training Loss:  0.7846485668700325 Valid Loss:  0.6498897022075867
Epoch: 14 Tra

In [14]:
torch.save(model, './trained_models/40X/resnet_transfer.pt')
#torch.save(model, './trained_models/100X/resnet_transfer.pt')
#torch.save(model, './trained_models/200X/resnet_transfer.pt')
#torch.save(model, './trained_models/400X/resnet_transfer.pt')

FileNotFoundError: [Errno 2] No such file or directory: './trained_models/40X/resnet_transfer.pt'

In [None]:
for epoch in range(1, 30): ## run the model for 10 epochs
    train_loss, valid_loss = [], []
    ## training part 
    model.train()
    for data, target in train_dataloader:
        optimizer.zero_grad()
        
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.item())
        
    ## evaluation part 
    model.eval()
    for data, target in val_dataloader:
        output = model(data)
        loss = criterion(output, target)
        valid_loss.append(loss.item())
    print ("Epoch:", epoch, "Training Loss: ", np.mean(train_loss), "Valid Loss: ", np.mean(valid_loss))


In [None]:
torch.save(model, './trained_models/40X/resnext_transfer.pt')
#torch.save(model, './trained_models/100X/resnext_transfer.pt')
#torch.save(model, './trained_models/200X/resnext_transfer.pt')
#torch.save(model, './trained_models/400X/resnext_transfer.pt')