In [4]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras import models, layers
from tensorflow.keras.layers import Dense, Input, Concatenate, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
from sklearn.model_selection import train_test_split
from keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from torch.utils.data import DataLoader, TensorDataset

from torch import nn
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from collections import Counter
import glob
import cv2
import os

In [5]:
def proc_brain(imgdir, w, h):

    WIDTH, HEIGHT = w, h
    
    x = []
    for i in range(len(imgdir)):
        # Read and resize image
        full_size_image = cv2.imread(imgdir[i])
        x.append(cv2.resize(full_size_image, (WIDTH,HEIGHT), interpolation=cv2.INTER_CUBIC)/255.0) 

    return x

def diag_mat_weights(dimp, type = 'first'):
    if type == 'first':
        dg = np.zeros((dimp-1, dimp))
        for i in range(dimp-1):
            dg[i,i] = -1
            dg[i,i+1]= 1
    elif type == 'second':
        dg = np.zeros((dimp-2, dimp))
        for i in range(dimp-2):
            dg[i,i] = -1
            dg[i,i+1]= 2
            dg[i,i+2]= -1
    else:
        pass
    return torch.Tensor(dg)
    
class PRODBSplineLayerMultiFeature(nn.Module):
    def __init__(self, input_dim, degree, num_knots, output_dim, num_neurons, bias = True):
        super(PRODBSplineLayerMultiFeature, self).__init__()
        self.degree = degree
        self.num_knots = num_knots
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_neurons = num_neurons
        
        if input_dim == 2:
            self.control_p = nn.Parameter(torch.randn(self.num_knots**2, self.output_dim))
        else:
            self.control_p = nn.Parameter(torch.randn(self.num_knots, self.num_neurons))
        if bias:
            self.bias = nn.Parameter(torch.randn(self.num_neurons))
        else:
            self.register_parameter('bias', None)
            
        self.inter = {}
    
    def basis_function(self, x, i, k, t):
    
        # Base case: degree 0 spline
        if k == 0:
            return ((t[i] <= x) & (x < t[i + 1])).float()
    
        # Recursive case
        denom1 = t[i + k] - t[i]
        denom2 = t[i + k + 1] - t[i + 1]
    
        term1 = 0
        if denom1 != 0:
            term1 = (x - t[i]) / denom1 * self.basis_function(x, i, k - 1, t)
    
        term2 = 0
        if denom2 != 0:
            term2 = (t[i + k + 1] - x) / denom2 * self.basis_function(x, i + 1, k - 1, t)
    
        return term1 + term2
    
    def forward(self, x):
        batch_size, num_features = x.size()
        device = x.device
        
        # Create knot vector
        # knots = torch.linspace(0, 1, self.num_knots + self.degree + 1).to(device)
        knots = torch.cat([
                        torch.zeros(self.degree),               # Add repeated values at the start for clamping
                        torch.linspace(0, 1, self.num_knots - self.degree + 1),  # Uniform knot spacing in the middle
                        torch.ones(self.degree)                 # Add repeated values at the end for clamping
                    ]).to(device)
        # Apply B-spline basis functions for each feature
        basises = []
    
        
        for feature in range(num_features):
            # Calculate B-spline basis functions for this feature
            basis = torch.stack([self.basis_function(x[:, feature], i, self.degree, knots) 
                                 for i in range(self.num_knots)], dim=-1)
            basises.append(basis)
            
        
        if num_features == 1:
            tout = basises[0] @ self.control_p
            self.inter['basic'] = basises[0].T
        else:
            self.inter['basic'] = torch.reshape(torch.stack(basises, dim = 1), (batch_size, self.num_knots * self.num_neurons)).T
            basises = torch.stack(basises)
            tout = basises.permute(1,2,0) * self.control_p
            tout = tout.sum(dim =1)
                
        if self.bias is not None:
            tout += self.bias        
            
        return tout
        
class NormLayer(nn.Module):
    def __init__(self):
        super(NormLayer, self).__init__()

    def forward(self, x):
        min_val = torch.min(x, axis = 1).values.reshape(-1,1)
        max_val = torch.max(x, axis = 1).values.reshape(-1,1)

        x = (x - min_val)/(max_val - min_val)  # Rescale to [0, 1]
        return x.detach()
        
def ECM(model, num_neurons, num_knots, initial_xi = 1, initial_sigma = 1, initial_lambda = 1e-4, L = None):
    lambdab = initial_lambda
    sigma = initial_sigma
    xi = initial_xi

    if L == 1:
        B = model.inter['ebasic']
        By = model.inter['basic']
        WB = model.sp1.control_p
    else:
        B = model.inter['ebasic2']
        By = model.inter['basic2']
        WB = model.sp2.control_p
        
    DB = diag_mat_weights(WB.size()[0]).to(device)
    size = B.size()[1]
    S = DB.T @ DB
    Cov_a = (xi**2)* torch.linalg.pinv(S)
    Cov_e = torch.eye(size*num_neurons)* sigma
    
    block_y = torch.reshape(By, (-1,1))
    flatB = B.view(num_neurons, num_knots, size)
        
    sqr_xi= 0
    sqr_sig = 0
    
    for i in range(num_neurons):
        Ncov = (Cov_a -(Cov_a @ flatB[i]) @ (torch.linalg.pinv(flatB[i].T @ Cov_a @ flatB[i] + Cov_e[size*i:size*(i+1),size*i:size*(i+1)]) @ flatB[i].T @ Cov_a))
        Nmu = (Cov_a @ flatB[i]) @ (torch.linalg.pinv(flatB[i].T @ Cov_a @ flatB[i] + Cov_e[size*i:size*(i+1),size*i:size*(i+1)])) @ By[:,i].reshape(-1,1)
        
        first_xi = S @ Ncov
        second_xi = (Nmu.T @ S @ Nmu)
        sqr_xi += torch.trace(first_xi) + second_xi
            
        first_sig = torch.norm(By[:,i])
        second_sig = 2 * (By[:,i] @ flatB[i].T) @ Nmu 
        third_sig = torch.trace((flatB[i] @ flatB[i].T) @ Ncov)
        four_sig = (Nmu.T @ flatB[i] @ flatB[i].T @ Nmu)
        
        sqr_sig += (first_sig + second_sig + third_sig + four_sig)
    
    sqr_xi /= num_neurons
    sqr_sig /= (num_neurons*size)
    
    Lambda = sqr_sig/sqr_xi
    
    return Lambda.item()

class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

# Data Preprocessing

In this section, we will load the brain tumor MRI Images and resize it to 224x224 pixels and ensure the images are in gray scale for numerical stability.

In [30]:
train1 = glob.glob('/Users/a080528/Downloads/BrainTumor/Training/glioma/*.jpg')
train2 = glob.glob('/Users/a080528/Downloads/BrainTumor/Training/notumor/*.jpg')
test1 = glob.glob('/Users/a080528/Downloads/BrainTumor/Testing/glioma/*.jpg')
test2 = glob.glob('/Users/a080528/Downloads/BrainTumor/Testing/notumor/*.jpg')

train = [train1, train2]; test = [test1, test2]

trainx = []
for f in train:
    x = proc_brain(f, 224, 224)
    trainx.append(x)

testx = []
for f in test:
    x = proc_brain(f, 224, 224)
    testx.append(x)

In [40]:
Xtraind = np.concatenate((np.array(trainx[0]), np.array(trainx[1])))
y_train = np.array([1]*len(trainx[0]))
y_train = np.concatenate((y_train, [0]*len(trainx[1])))
Xtestd = np.concatenate((np.array(testx[0]), np.array(testx[1])))
y_test = np.array([1]*len(testx[0]))
y_test = np.concatenate((y_test, [0]*len(testx[1])))

In [41]:
"""
In this block, you can customize the number of training size and testing size. According to your setting, we will 
randomly select the assigned number from image dataset. In this demo, we randomly select 500 images and 300 images
from training and testing dataset respectively. Besides, in order to fulfill the requirement of Pytorch, we need to
change the data to suitable type.

"""

trainsize = 500; testsize = 300

np.random.seed(123)
trainid = np.random.choice(len(Xtraind), trainsize)
testid = np.random.choice(len(Xtestd), testsize)

print(f"(Training) Number of glioma image: {Counter(y_train[trainid])[0]} |  Number of non-tumor image: {Counter(y_train[trainid])[1]} ")
print(f" (Testing) Number of glioma image: {Counter(y_test[testid])[0]} |  Number of non-tumor image: {Counter(y_test[testid])[1]} ")

X_train = torch.Tensor(Xtraind[trainid]).permute(0, 3, 1, 2); y_train = torch.Tensor(y_train[trainid]).type(torch.LongTensor)
X_test = torch.Tensor(Xtestd[testid]).permute(0, 3, 1, 2); y_test = torch.Tensor(y_test[testid]).type(torch.LongTensor)

(Training) Number of glioma image: 274 |  Number of non-tumor image: 226 
 (Testing) Number of glioma image: 172 |  Number of non-tumor image: 128 


# Modeling

## CNDNN-S

### Model setting

In [180]:
class DNNS1(nn.Module):
    def __init__(self, input_dim, degree, num_knots, num_neurons, output_dim, bias):
        super(DNNS1, self).__init__()
        self.num_neurons = num_neurons
        self.num_knots = num_knots
        self.ln1 = nn.Linear(input_dim, num_neurons)
        self.nm1 = NormLayer() 
        self.sp1 = PRODBSplineLayerMultiFeature(input_dim = 1, degree = degree, num_knots = num_knots, num_neurons = num_neurons, output_dim= output_dim, bias = True)
        self.ln2 = nn.Linear(num_neurons, output_dim)
        self.inter = {}
        
    def forward(self, x):
        ln1out = self.ln1(x)
        ln1out = self.nm1(ln1out)
        
        device = ln1out.device
        batch_size, _ = x.size()
        
        # # # # # # # # # # # # # #
        #         SPLINE 1        #
        # # # # # # # # # # # # # #
        
        sp1out = self.sp1(ln1out)
        bslist = self.sp1.inter['basic']
        
        self.inter['ebasic'] = bslist
        self.inter['basic'] = sp1out

        ln2out = self.ln2(sp1out)        
        return ln2out

class TumorClassifier(nn.Module):
    def __init__(self, Fin, dg, nk, nm, Fout, bias):
        super(TumorClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.gap = nn.Flatten()
        self.classifier = DNNS1(input_dim = Fin, degree = dg, num_knots = nk, num_neurons = nm, output_dim = Fout, bias = True).to(device)
        self.sm = nn.Softmax(dim = 1)
    def forward(self, x):
        x = self.features(x)
        x = self.gap(x)
        x = self.classifier(x)
        x = self.sm(x)

        return x

In [49]:
"""
Model setting:

`device`: running the program with cpu or gpu
`tmc`: the classifier that equip with DNN-S 
`nm` : number of neuron in DNN-S
`nk` : number of knot in DNN-S
`patientc` : (early-stop crierion) If the model didn't improve in n epoch then stop.
`patientr` : If the model didn't improve in n epoch then decrease learning rate with specific factor.

"""

# experiment setting
Iteration = 10000; bloss_list = []; tor = 1e-5; lr_tor = 1e-6
patientc = 10; patientr = 5; tpat = 0; bloss = 9999
nm = 100; nk = 15; doutput = 2
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model parameter 
tmc = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True)
learning_r = 1e-2
optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
criterion = nn.CrossEntropyLoss()

### Model training

In [None]:
for t in range(Iteration):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = tmc(X_train)
    loss = criterion(pyb_af, y_train); bloss_list.append(loss.item())
    
    if (t > 0) and ((bloss_list[t-1]-bloss_list[t])<tor):        
        if (tpat % patientr) == 0:
            learning_r *= 0.2 
            tpat += 1
            #print('Learning rate reduce to ', learning_r)
            optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
            if learning_r <= lr_tor:
                print('Convergence!')
                break
        elif tpat < patientc:
            tpat += 1
            pass
        else:
            print('Convergence!')
            break
        
    else:
        if loss < bloss:
            #torch.save(tmc.state_dict(), './brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))
            bloss = loss.item()
            tpat = 0
        tpat += 1

    if tpat == patientc:
        print('Convergence!')
        break
    
    prediction = torch.argmax(pyb_af, axis = 1)
    acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if(t % 10 == 0):
        print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', np.round(loss.item(), 4),' | Acc: ', acc.item())
        if(t % 50 == 0):
            with torch.no_grad():
                print((torch.argmax(tmc(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

### ECM Tuning

In [170]:
"""

`ECM_epoch`: number of epoch to run the ecm tuning

"""

print("Running the ECM tunning for penalty in each layer")

ECM_epoch = 10
with torch.no_grad():
    eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))

    WB = eval_model.classifier.sp1.control_p
    DB = diag_mat_weights(WB.size()[0], 'second').to(device)
    
    BestGCV = np.inf
    n = X_train.size()[0]
    
    for i in range(ECM_epoch):
        eval_model.train()
        MPSy = eval_model(X_train)

        # update following layer except for last layer
        LambdaB1 = ECM(model = eval_model.classifier, num_neurons = nm, num_knots = nk, L = 1)
        
        B1 = eval_model.classifier.inter['ebasic']
        By1 = eval_model.classifier.inter['basic']
        P2 = By1 @ torch.inverse(By1.T @ By1) @ By1.T

        
        size1 = B1.size()[1]
        B1 = B1.view(nk, nm, size1)

        NW1 = torch.empty((nk, nm))
        NB1 = torch.empty((nm))

        
        for i in range(nm):
            B1y = By1[:,i] - eval_model.classifier.sp1.bias.data[i]
            BB1 = B1[:,i].T

            # Update the weights and bias
            NW1[:, i] = (torch.inverse(BB1.T @ BB1 + (LambdaB1/size1) * (DB.T @ DB)) @ BB1.T @ B1y)
            NB1[i] = torch.mean(By1[:,i] - (NW1[:,i] @ BB1.T))
            
        # update the weight
        getattr(eval_model.classifier.sp1, 'control_p').data = NW1; getattr(eval_model.classifier.sp1, 'bias').data = NB1

        # update the last layer
        WholeB = torch.cat((torch.ones((n,1)), By1), dim = 1)
        NLn2W = (torch.inverse(WholeB.T @ WholeB) @ WholeB.T @ MPSy.type(torch.FloatTensor)).T
        getattr(eval_model.classifier.ln2, 'bias').data = NLn2W[:,0]; getattr(eval_model.classifier.ln2, 'weight').data = NLn2W[:,1:]
        
        eval_model.eval()
        pred_postecm = eval_model(X_train)
        CLoss = criterion(pred_postecm.detach(), y_train)
        trainGCV = CLoss/(n-torch.trace(P2))**2
        
        if trainGCV < BestGCV:
            BestLambdaB = LambdaB1
            BestGCV = trainGCV
            
        print(f"Lambda: {np.round(LambdaB1, 5)}| Training Loss: {np.round(CLoss, 5)}| Training GCV: {trainGCV.item()}")

Running the ECM tunning for penalty in each layer
Lambda: 0.2357| Training Loss: 0.6829500198364258| Training GCV: 4.268463726475602e-06
Lambda: 0.27503| Training Loss: 0.5793099999427795| Training GCV: 3.62062542080821e-06
Lambda: 0.27773| Training Loss: 0.7097499966621399| Training GCV: 4.435667051438941e-06
Lambda: 0.20232| Training Loss: 0.6283699870109558| Training GCV: 3.927318175556138e-06
Lambda: 0.19315| Training Loss: 0.6672599911689758| Training GCV: 4.173381967120804e-06
Lambda: 0.1774| Training Loss: 0.6161199808120728| Training GCV: 4.7093672037590295e-06
Lambda: 0.17239| Training Loss: 0.6477500200271606| Training GCV: 3.9890310290502384e-06
Lambda: 0.16794| Training Loss: 0.6128600239753723| Training GCV: 3.708579697558889e-06
Lambda: 0.16491| Training Loss: 0.6821600198745728| Training GCV: 4.254592113284161e-06
Lambda: 0.16238| Training Loss: 0.6178699731826782| Training GCV: 3.938550435123034e-06


### DPS fast tuning

In [None]:
"""

`fast_epoch`: number of epoch to run the fast tuning

"""

fast_epoch = 201
DPS = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
DPS.load_state_dict(torch.load('./brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
lr_ft = 1e-2
optimizer = torch.optim.Adam(DPS.parameters(), lr=lr_ft)

for t in range(1, fast_epoch):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = DPS(X_train)

    WB1 = DPS.classifier.sp1.control_p.data; DB1 = diag_mat_weights(WB1.size()[0]).to(device)

    loss = criterion(pyb_af, y_train) + (BestLambdaB/n) * torch.norm(DB1 @ WB1)
    bloss_list.append(loss.item())
    
    prediction = torch.argmax(pyb_af, axis = 1)
    acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if t % 10 == 0:
        print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', loss.item(),' | Acc: ', np.round(acc.item(), 5))
        if t % 100 == 0:
            with torch.no_grad():
                print((torch.argmax(DPS(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

## CN2DPS

### Model setting

In [225]:
class PRODBSplineLayerMultiFeature(nn.Module):
    def __init__(self, input_dim, degree, num_knots, output_dim, num_neurons, bias = True):
        super(PRODBSplineLayerMultiFeature, self).__init__()
        self.degree = degree
        self.num_knots = num_knots
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_neurons = num_neurons
        
        if input_dim == 2:
            self.control_p = nn.Parameter(torch.randn(self.num_knots**2, self.output_dim))
        else:
            self.control_p = nn.Parameter(torch.randn(self.num_knots, self.num_neurons))
        if bias:
            self.bias = nn.Parameter(torch.randn(self.num_neurons))
        else:
            self.register_parameter('bias', None)
            
        self.inter = {}
    
    def basis_function(self, x, i, k, t):
    
        # Base case: degree 0 spline
        if k == 0:
            return ((t[i] <= x) & (x < t[i + 1])).float()
    
        # Recursive case
        denom1 = t[i + k] - t[i]
        denom2 = t[i + k + 1] - t[i + 1]
    
        term1 = 0
        if denom1 != 0:
            term1 = (x - t[i]) / denom1 * self.basis_function(x, i, k - 1, t)
    
        term2 = 0
        if denom2 != 0:
            term2 = (t[i + k + 1] - x) / denom2 * self.basis_function(x, i + 1, k - 1, t)
    
        return term1 + term2
    
    def forward(self, x):
        batch_size, num_features = x.size()
        device = x.device
        
        # Create knot vector
        # knots = torch.linspace(0, 1, self.num_knots + self.degree + 1).to(device)
        knots = torch.cat([
                        torch.zeros(self.degree),               # Add repeated values at the start for clamping
                        torch.linspace(0, 1, self.num_knots - self.degree + 1),  # Uniform knot spacing in the middle
                        torch.ones(self.degree)                 # Add repeated values at the end for clamping
                    ]).to(device)
        # Apply B-spline basis functions for each feature
        basises = []
    
        
        for feature in range(num_features):
            # Calculate B-spline basis functions for this feature
            basis = torch.stack([self.basis_function(x[:, feature], i, self.degree, knots) 
                                 for i in range(self.num_knots)], dim=-1)
            basises.append(basis)
            
        if num_features == 1:
            tout = basises[0] @ self.control_p
            self.inter['eachbasic'] = basises[0].T
        else:
            #self.inter['basic'] = torch.reshape(torch.stack(basises, dim = 1), (batch_size, self.num_knots * self.num_neurons)).T
            self.inter['eachbasic'] = torch.reshape(torch.stack(basises, dim = 1), (batch_size, self.num_knots * self.num_neurons)).T
            
            basises = torch.stack(basises)
            tout = basises.permute(1,2,0) * self.control_p
            tout = tout.sum(dim =1)
                
        if self.bias is not None:
            tout += self.bias        

        self.inter['basicoutput'] = tout
        
        return tout
        
class DNNS2(nn.Module):
    def __init__(self, input_dim, degree, num_knots, num_neurons, output_dim, bias):
        super(DNNS2, self).__init__()
        self.num_neurons = num_neurons
        self.num_knots = num_knots
        self.ln1 = nn.Linear(input_dim, num_neurons)
        self.nm1 = NormLayer() 
        self.sp1 = PRODBSplineLayerMultiFeature(input_dim = 1, degree = degree, num_knots = num_knots, num_neurons = num_neurons, output_dim= output_dim, bias = True)
        self.nm2 = NormLayer() 
        self.sp2 = PRODBSplineLayerMultiFeature(input_dim = 1, degree = degree, num_knots = num_knots, num_neurons = num_neurons, output_dim= output_dim, bias = True)
        self.ln2 = nn.Linear(num_neurons, output_dim)
        self.inter = {}
        
    def forward(self, x):
        ln1out = self.ln1(x)
        ln1out = self.nm1(ln1out)
        
        device = ln1out.device
        batch_size, _ = x.size()
        
        # # # # # # # # # # # # # #
        #         SPLINE          #
        # # # # # # # # # # # # # #
        
        sp1out = self.sp1(ln1out)
        sp1out = self.nm2(sp1out)

        # # # # # # # # # # # # # #
        #         SPLINE 2        #
        # # # # # # # # # # # # # #
        
        sp2out = self.sp2(sp1out)
        ln2out = self.ln2(sp2out)
        
        return ln2out

In [226]:
class TumorClassifier(nn.Module):
    def __init__(self, Fin, dg, nk, nm, Fout, bias):
        super(TumorClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.gap = nn.Flatten()
        self.classifier = DNNS2(input_dim = 32*56*56, degree = dg, num_knots = nk, num_neurons = nm, output_dim = Fout, bias = True).to(device)
        self.sm = nn.Softmax(dim = 1)
    def forward(self, x):
        x = self.features(x)
        x = self.gap(x)
        x = self.classifier(x)
        x = self.sm(x)

        return x

class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [232]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

"""
Model setting:

`device`: running the program with cpu or gpu
`tmc`: the classifier that equip with DNN-S 
`nm` : number of neuron in DNN-S
`nk` : number of knot in DNN-S
`patientc` : (early-stop crierion) If the model didn't improve in n epoch then stop.
`patientr` : If the model didn't improve in n epoch then decrease learning rate with specific factor.

"""

# experiment setting
Iteration = 10000; bloss_list = []; tor = 1e-5; lr_tor = 1e-6
patientc = 10; patientr = 5; tpat = 0; bloss = 9999
nm = 100; nk = 15; doutput = 2
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model parameter 
tmc = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True)
learning_r = 1e-2
optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
criterion = nn.CrossEntropyLoss()

### Model training

In [233]:
for t in range(Iteration):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = tmc(X_train)
    loss = criterion(pyb_af, y_train); bloss_list.append(loss.item())
    
    if (t > 0) and ((bloss_list[t-1]-bloss_list[t])<tor):        
        if (tpat % patientr) == 0:
            learning_r *= 0.2 
            tpat += 1
            #print('Learning rate reduce to ', learning_r)
            optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
            if learning_r <= lr_tor:
                print('Convergence!')
                break
        elif tpat < patientc:
            tpat += 1
            pass
        else:
            print('Convergence!')
            break
        
    else:
        if loss < bloss:
            #torch.save(tmc.state_dict(), './brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))
            bloss = loss.item()
            tpat = 0
        tpat += 1

    if tpat == patientc:
        print('Convergence!')
        break
    
    prediction = torch.argmax(pyb_af, axis = 1)
    acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if(t % 10 == 0):
        print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', np.round(loss.item(), 4),' | Acc: ', acc.item())
        if(t % 100 == 0):
            with torch.no_grad():
                print((torch.argmax(tmc(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

| Epoch:  1 / 100  | Loss:  0.6802  | Acc:  0.593999981880188
tensor(0.5733)
| Epoch:  11 / 100  | Loss:  0.6035  | Acc:  0.722000002861023
| Epoch:  21 / 100  | Loss:  0.5478  | Acc:  0.7919999957084656
| Epoch:  31 / 100  | Loss:  0.5064  | Acc:  0.8199999928474426
| Epoch:  41 / 100  | Loss:  0.4776  | Acc:  0.8560000061988831
| Epoch:  51 / 100  | Loss:  0.4547  | Acc:  0.8899999856948853
tensor(0.7667)
| Epoch:  61 / 100  | Loss:  0.4357  | Acc:  0.8939999938011169
| Epoch:  71 / 100  | Loss:  0.4198  | Acc:  0.9139999747276306
| Epoch:  81 / 100  | Loss:  0.4061  | Acc:  0.9259999990463257
| Epoch:  91 / 100  | Loss:  0.3934  | Acc:  0.9419999718666077


In [260]:
with torch.no_grad():
    eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./2brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
    pred_postecm = eval_model(X_train)
    CLoss = criterion(pred_postecm.detach(), y_train)

### ECM Tuning

In [238]:
def ECM(model, num_neurons, num_knots, initial_xi = 1, initial_sigma = 1, initial_lambda = 1e-4):
    lambdab = initial_lambda
    sigma = initial_sigma
    xi = initial_xi

    B = model.inter['eachbasic']
    By = model.inter['basicoutput']
    WB = model.control_p
        
    DB = diag_mat_weights(WB.size()[0]).to(device)
    size = B.size()[1]
    S = DB.T @ DB
    Cov_a = (xi**2)* torch.linalg.pinv(S)
    Cov_e = torch.eye(size*num_neurons)* sigma
    
    block_y = torch.reshape(By, (-1,1))
    flatB = B.view(num_neurons, num_knots, size)
        
    sqr_xi= 0
    sqr_sig = 0
    
    for i in range(num_neurons):
        Ncov = (Cov_a -(Cov_a @ flatB[i]) @ (torch.linalg.pinv(flatB[i].T @ Cov_a @ flatB[i] + Cov_e[size*i:size*(i+1),size*i:size*(i+1)]) @ flatB[i].T @ Cov_a))
        Nmu = (Cov_a @ flatB[i]) @ (torch.linalg.pinv(flatB[i].T @ Cov_a @ flatB[i] + Cov_e[size*i:size*(i+1),size*i:size*(i+1)])) @ By[:,i].reshape(-1,1)
        
        first_xi = S @ Ncov
        second_xi = (Nmu.T @ S @ Nmu)
        sqr_xi += torch.trace(first_xi) + second_xi
            
        first_sig = torch.norm(By[:,i])
        second_sig = 2 * (By[:,i] @ flatB[i].T) @ Nmu 
        third_sig = torch.trace((flatB[i] @ flatB[i].T) @ Ncov)
        four_sig = (Nmu.T @ flatB[i] @ flatB[i].T @ Nmu)
        
        sqr_sig += (first_sig + second_sig + third_sig + four_sig)
    
    sqr_xi /= num_neurons
    sqr_sig /= (num_neurons*size)
    
    Lambda = sqr_sig/sqr_xi
    
    return Lambda.item()

In [257]:
"""

`ECM_epoch`: number of epoch to run the ecm tuning

"""

LoP = 2
PSname = ['sp'+str(i+1) for i in range(LoP)]
ECM_epoch = 10

print("Running the ECM tunning for penalty in each layer")

with torch.no_grad():
    eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./2brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))

    BestGCV = np.inf
    n = X_train.size()[0]
    
    for i in range(ECM_epoch):
        eval_model.train()
        MPSy = eval_model(X_train)
    
        # update following layer except for last layer
        LambdaL = []
        for layer in PSname:
            splayer = getattr(eval_model.classifier, layer)
            WB = getattr(splayer, 'control_p')
            DB = diag_mat_weights(WB.size()[0], 'second').to(device)
            LambdaB = ECM(model = getattr(eval_model.classifier, layer), num_neurons = nm, num_knots = nk)
            LambdaL.append(LambdaB)
            
            B1 = getattr(eval_model.classifier, layer).inter['eachbasic']
            By1 = getattr(eval_model.classifier, layer).inter['basicoutput']
            P2 = By1 @ torch.inverse(By1.T @ By1) @ By1.T
    
            size1 = B1.size()[1]
            B1 = B1.view(nk, nm, size1)
    
            NW1 = torch.empty((nk, nm))
            NB1 = torch.empty((nm))
    
        
            for i in range(nm):
                B1y = By1[:,i] - getattr(eval_model.classifier, layer).bias.data[i]
                BB1 = B1[:,i].T
        
                # Update the weights and bias
                NW1[:, i] = (torch.inverse(BB1.T @ BB1 + (LambdaB/size1) * (DB.T @ DB)) @ BB1.T @ B1y)
                NB1[i] = torch.mean(By1[:,i] - (NW1[:,i] @ BB1.T))
                
            # update the weight
            getattr(splayer, 'control_p').data = NW1; getattr(splayer, 'bias').data = NB1
    
        # update the last layer
        WholeB = torch.cat((torch.ones((n,1)), By1), dim = 1)
        NLn2W = (torch.inverse(WholeB.T @ WholeB) @ WholeB.T @ MPSy.type(torch.FloatTensor)).T
        
        getattr(eval_model.classifier.ln2, 'bias').data = NLn2W[:,0]; getattr(eval_model.classifier.ln2, 'weight').data = NLn2W[:,1:]
        
        eval_model.eval()
        pred_postecm = eval_model(X_train)
        CLoss = criterion(pred_postecm.detach(), y_train)
        trainGCV = CLoss/(n-torch.trace(P2))**2
        
        if trainGCV < BestGCV:
            BestLambdaB = LambdaB1
            BestGCV = trainGCV
            
        print(f"Lambda: {LambdaL}| Training Loss: {np.round(CLoss, 5)}| Training GCV: {trainGCV.item()}")

Running the ECM tunning for penalty in each layer
Lambda: [0.30183088779449463, 0.30312493443489075]| Training Loss: 0.7657999992370605| Training GCV: 4.786255885846913e-06
Lambda: [0.973455011844635, 0.6522708535194397]| Training Loss: 0.8041099905967712| Training GCV: 4.120976882404648e-06
Lambda: [1.3706945180892944, 0.3747212588787079]| Training Loss: 0.7974600195884705| Training GCV: 5.1150018407497555e-06
Lambda: [1.8239096403121948, 0.3827195465564728]| Training Loss: 0.8612599968910217| Training GCV: 4.769452061736956e-06
Lambda: [1.5904974937438965, 1.7096126079559326]| Training Loss: 0.7672600150108337| Training GCV: 3.7891347801632946e-06
Lambda: [1.670275092124939, 1.7180237770080566]| Training Loss: 0.7532600164413452| Training GCV: 3.5555015074351104e-06
Lambda: [1.777961254119873, 0.859359860420227]| Training Loss: 0.907260000705719| Training GCV: 4.15523027186282e-06
Lambda: [1.9107773303985596, 1.430779218673706]| Training Loss: 0.7932599782943726| Training GCV: 3.4246

### 2DPS Tuning

In [264]:
"""

`fast_epoch`: number of epoch to run the fast tuning

"""

fast_epoch = 201
D2PS = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
D2PS.load_state_dict(torch.load('./2brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
lr_ft = 1e-2
optimizer = torch.optim.Adam(D2PS.parameters(), lr=lr_ft)

In [None]:
for t in range(1, fast_epoch):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = D2PS(X_train)
    loss = criterion(pyb_af, y_train)
    
    for l in range(len(PSname)):
        WB = getattr(D2PS.classifier, PSname[l]).control_p.data; DB = diag_mat_weights(WB.size()[0]).to(device)
        loss += (LambdaL[l]/n) * torch.norm(DB @ WB)
            
    prediction = torch.argmax(pyb_af, axis = 1)
    acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if t % 10 == 0:
        print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', loss.item(),' | Acc: ', np.round(acc.item(), 5))
        if t % 100 == 0:
            with torch.no_grad():
                print((torch.argmax(D2PS(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

[1.9773733615875244, 1.1929782629013062]

## Testing early-stop and decay lr

In [43]:
nk = 15; nm = 100; doutput = 2
tmc = TumorClassifier(Fin = 64, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True)

learning_r = 1e-2
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
Iteration = 10000; tor = 1e-5; bloss = 9999

early_stopper = EarlyStopper(patience=10, min_delta=tor)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.2)

dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

for t in range(1, Iteration+1):

    tmc.train()
    train_loss = 0
    for batch_X, batch_y in dataloader:
        # Forward pass
        outputs = tmc(batch_X)
        loss = criterion(outputs, batch_y)
        train_loss += loss.item()
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    tmc.eval()
        
    if train_loss < bloss:
        best_model = tmc.state_dict()
        bloss = train_loss

    scheduler.step(train_loss)
    
    if early_stopper.early_stop(train_loss):   
        print('Early Stop at Epoch: ', t)
        break    

    print('| Epoch: ',t,'/',str(Iteration),' | Loss: ', np.round(train_loss/len(dataloader), 4))

    if t % 10 == 0:
        with torch.no_grad():
            print('training result: ', ((torch.argmax(tmc(X_train).detach(), axis = 1) == y_train).sum()/len(y_train)).item())
            print('testing result: ', ((torch.argmax(tmc(X_test).detach(), axis = 1) == y_test).sum()/len(y_test)).item())

print('Saving the best model ...')
torch.save(best_model, './Brain2MBS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))

| Epoch:  1 / 10000  | Loss:  0.6334
| Epoch:  2 / 10000  | Loss:  0.4917
| Epoch:  3 / 10000  | Loss:  0.454
| Epoch:  4 / 10000  | Loss:  0.4373
| Epoch:  5 / 10000  | Loss:  0.424
| Epoch:  6 / 10000  | Loss:  0.423
| Epoch:  7 / 10000  | Loss:  0.4155
| Epoch:  8 / 10000  | Loss:  0.404
| Epoch:  9 / 10000  | Loss:  0.4032
| Epoch:  10 / 10000  | Loss:  0.3994
training result:  0.9300000071525574
testing result:  0.8500000238418579
| Epoch:  11 / 10000  | Loss:  0.3926
| Epoch:  12 / 10000  | Loss:  0.3936
| Epoch:  13 / 10000  | Loss:  0.3872
| Epoch:  14 / 10000  | Loss:  0.3904
| Epoch:  15 / 10000  | Loss:  0.3818
| Epoch:  16 / 10000  | Loss:  0.3768
| Epoch:  17 / 10000  | Loss:  0.3742
| Epoch:  18 / 10000  | Loss:  0.3706
| Epoch:  19 / 10000  | Loss:  0.3773
| Epoch:  20 / 10000  | Loss:  0.3682
training result:  0.9599999785423279
testing result:  0.8799999952316284
| Epoch:  21 / 10000  | Loss:  0.3663
| Epoch:  22 / 10000  | Loss:  0.3691
| Epoch:  23 / 10000  | Loss:  

In [45]:
with torch.no_grad():
    eval_model = TumorClassifier(Fin = 64, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./Brain2MBS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
    print((torch.argmax(eval_model(X_train).detach(), axis = 1) == y_train).sum()/len(y_train))
    print((torch.argmax(eval_model(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

tensor(0.9740)
tensor(0.8950)


## Original code

In [192]:
nk = 100; nm = 15; doutput = 2
tmc = TumorClassifier(Fin = 50, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True)

learning_r = 1e-2
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
Iteration = 10000; bloss_list = []; tor = 1e-5; lr_tor = 1e-6
patientc = 10; patientr = 5; tpat = 0; bloss = 9999

for t in range(Iteration):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = tmc(X_train)
    loss = criterion(pyb_af, y_train); bloss_list.append(loss.item())
    
    if (t > 0) and ((bloss_list[t-1]-bloss_list[t])<tor):        
        if (tpat!= 0) and (tpat % patientr) == 0:
            learning_r *= 0.2 
            tpat += 1
            #print('Learning rate reduce to ', learning_r)
            optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
            if learning_r <= lr_tor:
                print('Convergence!')
                break
        elif tpat < patientc:
            tpat += 1
            pass
        else:
            print('Convergence!')
            break
        
    else:
        if loss < bloss:
            #print('Current loss: ', loss.item(), ' | , previous best loss: ', bloss, ' | saving best model ...')
            #torch.save(tmc.state_dict(), './Brain2MBS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))
            bloss = loss.item()
            tpat = 0
        else:
            tpat += 1
        #tpat += 1

    if tpat == patientc:
        print('Convergence!')
        with torch.no_grad():
            eval_model = TumorClassifier(Fin = 64, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
            eval_model.load_state_dict(torch.load('./Brain2MBS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
            print((torch.argmax(eval_model(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))
        break

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if t % 10 == 0:
        with torch.no_grad():
            prediction = torch.argmax(pyb_af, axis = 1)
            acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
            testacc = (torch.argmax(tmc(X_test).detach(), axis = 1) == y_test).sum()/len(y_test)
            
            print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', np.round(loss.item(), 4),' | Acc: ', acc.item(),' | tAcc: ', testacc.item())

| Epoch:  1 / 10000  | Loss:  0.705  | Acc:  0.5460000038146973  | tAcc:  0.5666666626930237
| Epoch:  11 / 10000  | Loss:  0.6469  | Acc:  0.6380000114440918  | tAcc:  0.5799999833106995
| Epoch:  21 / 10000  | Loss:  0.5948  | Acc:  0.75  | tAcc:  0.6466666460037231
| Epoch:  31 / 10000  | Loss:  0.5479  | Acc:  0.7820000052452087  | tAcc:  0.653333306312561
| Epoch:  41 / 10000  | Loss:  0.5078  | Acc:  0.8140000104904175  | tAcc:  0.6933333277702332
| Epoch:  51 / 10000  | Loss:  0.4748  | Acc:  0.8579999804496765  | tAcc:  0.7166666388511658
| Epoch:  61 / 10000  | Loss:  0.4481  | Acc:  0.8859999775886536  | tAcc:  0.7366666793823242
| Epoch:  71 / 10000  | Loss:  0.4266  | Acc:  0.9039999842643738  | tAcc:  0.7333333492279053
| Epoch:  81 / 10000  | Loss:  0.4088  | Acc:  0.9259999990463257  | tAcc:  0.7433333396911621
| Epoch:  91 / 10000  | Loss:  0.3935  | Acc:  0.9399999976158142  | tAcc:  0.7566666603088379


KeyboardInterrupt: 

In [156]:
with torch.no_grad():
    eval_model = TumorClassifier(Fin = 64, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./Brain2MBS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
    print((torch.argmax(eval_model(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

tensor(0.7133)


In [151]:
with torch.no_grad():
    print('------------------------------------------')
    print('Before adding penalty ... ')
    eval_model = TumorClassifier(Fin = 64, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./Brain2MBS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
    
    #eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    #eval_model.load_state_dict(torch.load('./2brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
    print('------------------------------------------')
    print('After adding penalty ... ')

    n = X_train.size()[0]
    WB = eval_model.classifier.sp1.control_p
    DB = diag_mat_weights(WB.size()[0], 'second').to(device)
    Bestloss = 9999
    
    for i in range(10):
        MPSy = eval_model(X_train)
        LambdaB1 = ECM(model = eval_model.classifier, num_neurons = nm, num_knots = nk, L = 1)
        LambdaB2 = ECM(model = eval_model.classifier, num_neurons = nm, num_knots = nk, L = 2)
        
        B1 = eval_model.classifier.inter['ebasic']
        B2 = eval_model.classifier.inter['ebasic2']
        P2 = (torch.linalg.pinv(B2.T @ B2) @ B2.T @ B2)
        
        By1 = eval_model.classifier.inter['basic']
        By2 = eval_model.classifier.inter['basic2']
        
        size1 = B1.size()[1]
        size2 = B2.size()[1]

        B1 = B1.view(nm, nk, size1)
        B2 = B2.view(nm, nk, size2)

        NW1 = torch.empty((nk, nm))
        NW2 = torch.empty((nk, nm))
        NB1 = torch.empty((nm))
        NB2 = torch.empty((nm))
        for i in range(nm):
            B1y = By1[:,i] - eval_model.classifier.sp1.bias.data[i]
            B2y = By2[:,i] - eval_model.classifier.sp2.bias.data[i]

            BB1 = B1[i].T
            BB2 = B2[i].T
            PB1 = (torch.linalg.pinv(BB1.T @ BB1) @ BB1.T @ BB1)
            PB2 = (torch.linalg.pinv(BB2.T @ BB2) @ BB2.T @ BB2)

            # Update the weights and bias
            NW1[:, i] = (torch.inverse(BB1.T @ BB1 + (LambdaB1/size1) * (DB.T @ DB)) @ BB1.T @ B1y)
            NW2[:, i] = (torch.inverse(BB2.T @ BB2 + (LambdaB2/size2) * (DB.T @ DB)) @ BB2.T @ B2y)
            NB1[i] = torch.mean(By1[:,i] - (NW1[:,i] @ BB1.T))
            NB2[i] = torch.mean(By2[:,i] - (NW2[:,i] @ BB2.T))
            
        # update the weight
        getattr(eval_model.classifier.sp1, 'control_p').data = NW1
        getattr(eval_model.classifier.sp2, 'control_p').data = NW2
        getattr(eval_model.classifier.sp1, 'bias').data = NB1
        getattr(eval_model.classifier.sp2, 'bias').data = NB2
        

        MPSy = eval_model(X_train)
        trainloss = np.round(criterion(MPSy.detach(), y_train).item(), 5)
        GCV = np.round((criterion(MPSy.detach(), y_train)/(n*size2-torch.trace(P2))).item(), 5)
        
        if trainloss < Bestloss:
            BestLambdaB1, BestLambdaB2 = LambdaB1, LambdaB2
            Bestloss = trainloss
            
        MPSy = eval_model(X_test)
        print('Lambda: ', np.round(LambdaB1, 5),' and ', np.round(LambdaB2, 5),'| Training Loss: ', trainloss, ' | GCV: ', GCV)

------------------------------------------
Before adding penalty ... 
------------------------------------------
After adding penalty ... 
Lambda:  0.04902  and  0.06129 | Training Loss:  0.3744  | GCV:  0.0
Lambda:  0.04903  and  0.0613 | Training Loss:  0.3744  | GCV:  0.0
Lambda:  0.04903  and  0.0613 | Training Loss:  0.3744  | GCV:  0.0
Lambda:  0.04903  and  0.06131 | Training Loss:  0.3744  | GCV:  0.0
Lambda:  0.04903  and  0.06132 | Training Loss:  0.3744  | GCV:  0.0
Lambda:  0.04904  and  0.06133 | Training Loss:  0.37441  | GCV:  0.0
Lambda:  0.04904  and  0.06134 | Training Loss:  0.37441  | GCV:  0.0
Lambda:  0.04904  and  0.06134 | Training Loss:  0.37441  | GCV:  0.0
Lambda:  0.04904  and  0.06135 | Training Loss:  0.37441  | GCV:  0.0
Lambda:  0.04905  and  0.06136 | Training Loss:  0.37441  | GCV:  0.0


In [152]:
fast_epoch = 10001
eval_model = TumorClassifier(Fin = 64, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
eval_model.load_state_dict(torch.load('./Brain2MBS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))

lr_ft = 1e-3; tor = 1e-5
optimizer = torch.optim.Adam(eval_model.parameters(), lr=lr_ft)
early_stopper = EarlyStopper(patience=10, min_delta=tor)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.2)
patientr = 5; patientc = 10; tpat = 0; bloss = 9999; bloss_list = [bloss]
for t in range(1, fast_epoch):

    # Forward pass: Compute predicted y by passing x to the modelsp
    eval_model.train()
    pyb_af = eval_model(X_train)

    WB1 = eval_model.classifier.sp1.control_p.data; DB1 = diag_mat_weights(WB1.size()[0]).to(device)
    WB2 = eval_model.classifier.sp2.control_p.data; DB2 = diag_mat_weights(WB2.size()[0]).to(device)

    loss = criterion(pyb_af, y_train) + (BestLambdaB1/n) * torch.norm(DB1 @ WB1) + (BestLambdaB2/n) * torch.norm(DB2 @ WB2)
    bloss_list.append(loss)
    
    '''
    if early_stopper.early_stop(loss):   
        print('Early Stop at Epoch: ', t)
        with torch.no_grad():
            tsprediction = torch.argmax(eval_model(X_test), axis = 1)
            tsacc = (tsprediction == y_test).sum()/len(y_test)
            
            tsprediction = torch.argmax(eval_model(X_test), axis = 1)
            tsacc = (tsprediction == y_test).sum()/len(y_test)
            print('| Epoch: ',t,'/',str(fast_epoch-1),' | Train ACC: ', np.round(tracc.item(), 4),' | Test ACC: ', tsacc.item())
        break  

    scheduler.step(loss)
    '''

    if (t > 0) and ((bloss_list[t-1]-bloss_list[t])<tor):        
        if (tpat!= 0) and (tpat % patientr) == 0:
            lr_ft *= 0.2 
            tpat += 1
            print('Learning rate reduce to ', lr_ft)
            optimizer = torch.optim.Adam(eval_model.parameters(), lr=lr_ft)
            if lr_ft <= lr_tor:
                print('Convergence!')
                break
        elif tpat < patientc:
            tpat += 1
            pass
        else:
            print('Convergence!')
            break
        
    else:
        if loss < bloss:
            print('Current loss: ', loss.item(), ' | , previous best loss: ', bloss, ' | saving best model ...')
            torch.save(eval_model.state_dict(), './PBrain2DPS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))
            bloss = loss.item()
            tpat = 0
        else:
            tpat += 1

    if tpat == patientc:
        print('Convergence!')
        with torch.no_grad():
            eval_model.load_state_dict(torch.load('./PBrain2DPS'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
            print((torch.argmax(eval_model(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))
        break
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print('| Epoch: ',t,'/',str(fast_epoch-1),' | Loss: ', np.round(loss.item(), 5))
    if t % 10 == 0:
        eval_model.eval()
        with torch.no_grad():
            trprediction = torch.argmax(pyb_af, axis = 1)
            tracc = (trprediction == y_train).sum()/len(y_train)
            print('| Epoch: ',t,'/',str(fast_epoch-1),' | Train ACC: ', np.round(tracc.item(), 4))

        if t % 50 == 0:
            with torch.no_grad():
                tsprediction = torch.argmax(eval_model(X_test), axis = 1)
                tsacc = (tsprediction == y_test).sum()/len(y_test)
                print('| Epoch: ',t,'/',str(fast_epoch-1),' | Test ACC: ', np.round(tsacc.item(), 5))

Current loss:  0.38404369354248047  | , previous best loss:  9999  | saving best model ...
| Epoch:  1 / 10000  | Loss:  0.38404
| Epoch:  2 / 10000  | Loss:  0.38404
| Epoch:  3 / 10000  | Loss:  0.38403
| Epoch:  4 / 10000  | Loss:  0.38403
| Epoch:  5 / 10000  | Loss:  0.38403
| Epoch:  6 / 10000  | Loss:  0.38402
Learning rate reduce to  0.0002
| Epoch:  7 / 10000  | Loss:  0.38402
| Epoch:  8 / 10000  | Loss:  0.38401
| Epoch:  9 / 10000  | Loss:  0.38401
| Epoch:  10 / 10000  | Loss:  0.38401
| Epoch:  10 / 10000  | Train ACC:  0.94
Convergence!
tensor(0.7100)


In [108]:
resnet50 = models.resnet50(weights=True)
for param in resnet50.parameters():
    param.requires_grad = False

resnet50.fc = MPSv3(input_dim = resnet50.fc.in_features, degree = 3, num_knots = 10, num_neurons = 100, output_dim = 2, bias = True).to(device)
learning_r = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet50.parameters(), lr=learning_r)
Iteration = 10000; tor = 1e-5
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.2)
early_stopper = EarlyStopper(patience=10, min_delta=1e-5)
bloss = 9999

for t in range(Iteration):

    train_loss = 0
    resnet50.train()
    for inputs, labels in dataloader:
        print(inputs.size())
        optimizer.zero_grad()
        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    print(f'Epoch [{t+1}/{Iteration}], Loss: {train_loss:.4f}')
    if train_loss < bloss:
        best_model = resnet50.state_dict()
        bloss = train_loss
        
    scheduler.step(train_loss)

    if early_stopper.early_stop(train_loss):
        print('Convergence!')
        break 

    resnet50.eval()
    with torch.no_grad():
        correct = 0
        total = 0
    
        outputs = resnet50(X_test)
        _, predicted = torch.max(outputs.data, 1)
        acc = (predicted == y_test).sum()/len(y_test)
        print('testing: ', acc.item())
        
print('Saving the best model ...')
torch.save(best_model, './model/MBS_Brain_n'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))

torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([20, 3, 224, 224])
Epoch [1/10000], Loss: 13.4171
testing:  0.48500001430511475
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Size([32, 3, 224, 224])
torch.Siz

KeyboardInterrupt: 

In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class TumorClassifier(nn.Module):
    def __init__(self, Fin, dg, nk, nm, Fout, bias):
        super(TumorClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.gap = nn.Flatten()
        self.classifier = MPSv3(input_dim = Fin, degree = dg, num_knots = nk, num_neurons = nm, output_dim = Fout, bias = True).to(device)
        self.sm = nn.Softmax(dim = 1)
    def forward(self, x):
        x = self.features(x)
        x = self.gap(x)
        #x = x.view(x.size(0), -1)
        x = self.classifier(x)
        x = self.sm(x)

        return x

In [25]:
dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

nk = 10; nm = 50; doutput = 2
tmc = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True)

learning_r = 1e-2
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
Iteration = 10001; bloss_list = []; tor = 1e-5; bloss = 9999

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.2)
early_stopper = EarlyStopper(patience=10, min_delta=1e-5)

for t in range(Iteration):

    tmc.train()
    # Forward pass: Compute predicted y by passing x to the modelsp

    train_loss = 0
    pch_acc = 0
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = tmc(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss
        pch_acc += (torch.argmax(outputs, axis = 1) == labels).sum()

    avg_train_acc = pch_acc.item()/len(y_train)
    avg_train_loss = np.round(train_loss.item()/len(dataloader), 6)
    scheduler.step(avg_train_loss)

    if avg_train_loss < bloss:
        best_model = tmc.state_dict()
        bloss = avg_train_loss

    if early_stopper.early_stop(avg_train_loss):
        print('Convergence!')
        break 
        
    print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', avg_train_loss,' | Acc: ', avg_train_acc)
    
    tmc.eval()
    if((t+1) % 10 == 0):
        with torch.no_grad():
            avg_test_acc = (torch.argmax(tmc(X_test).detach(), axis = 1) == y_test).sum()/len(y_test)
            print(avg_test_acc)

print('Saving the best model ...')
torch.save(best_model, './model/MBS_Brain_n'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))

| Epoch:  1 / 10001  | Loss:  0.635742  | Acc:  0.64
| Epoch:  2 / 10001  | Loss:  0.510162  | Acc:  0.838
| Epoch:  3 / 10001  | Loss:  0.47011  | Acc:  0.864
| Epoch:  4 / 10001  | Loss:  0.442819  | Acc:  0.888
| Epoch:  5 / 10001  | Loss:  0.428159  | Acc:  0.91
| Epoch:  6 / 10001  | Loss:  0.415759  | Acc:  0.922
| Epoch:  7 / 10001  | Loss:  0.407487  | Acc:  0.924
| Epoch:  8 / 10001  | Loss:  0.402444  | Acc:  0.932
| Epoch:  9 / 10001  | Loss:  0.402321  | Acc:  0.924
| Epoch:  10 / 10001  | Loss:  0.396734  | Acc:  0.934
tensor(0.9100)
| Epoch:  11 / 10001  | Loss:  0.391842  | Acc:  0.936
| Epoch:  12 / 10001  | Loss:  0.385826  | Acc:  0.942
| Epoch:  13 / 10001  | Loss:  0.38053  | Acc:  0.94
| Epoch:  14 / 10001  | Loss:  0.376995  | Acc:  0.948
| Epoch:  15 / 10001  | Loss:  0.378152  | Acc:  0.944
| Epoch:  16 / 10001  | Loss:  0.372062  | Acc:  0.952
| Epoch:  17 / 10001  | Loss:  0.371311  | Acc:  0.952
| Epoch:  18 / 10001  | Loss:  0.367258  | Acc:  0.956
| Epoch: 

In [26]:
with torch.no_grad():
    print('------------------------------------------')
    print('Before adding penalty ... ')
    eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./model/MBS_Brain_n'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
    print('------------------------------------------')
    print('After adding penalty ... ')

    
    WB = eval_model.classifier.sp1.control_p
    DB = diag_mat_weights(WB.size()[0], 'second').to(device)
    Bestloss = 9999
    
    for i in range(10):
        MPSy = eval_model(X_train)
        LambdaB1 = ECM(model = eval_model.classifier, num_neurons = nm, num_knots = nk, L = 1)
        B1 = eval_model.classifier.inter['ebasic']
        P2 = (torch.linalg.pinv(B1.T @ B1) @ B1.T @ B1)
        By1 = eval_model.classifier.inter['basic']        
        size1 = B1.size()[1]
        B1 = B1.view(nm, nk, size1)
        NW1 = torch.empty((nk, nm))
        NB1 = torch.empty((nm))
        for i in range(nm):
            B1y = By1[:,i] - eval_model.classifier.sp1.bias.data[i]
            BB1 = B1[i].T
            PB1 = (torch.linalg.pinv(BB1.T @ BB1) @ BB1.T @ BB1)
            # Update the weights and bias
            NW1[:, i] = (torch.inverse(BB1.T @ BB1 + (LambdaB1/size1) * (DB.T @ DB)) @ BB1.T @ B1y)
            NB1[i] = torch.mean(By1[:,i] - (NW1[:,i] @ BB1.T))
            
        # update the weight
        getattr(eval_model.classifier.sp1, 'control_p').data = NW1
        getattr(eval_model.classifier.sp1, 'bias').data = NB1
        

        MPSy = eval_model(X_train)
        trainloss = np.round(criterion(MPSy.detach(), y_train).item(), 5)
        
        if trainloss < Bestloss:
            BestLambdaB = LambdaB1
            Bestloss = trainloss
            
        print('Lambda: ', np.round(LambdaB1, 5),'| Training Loss: ', trainloss)

------------------------------------------
Before adding penalty ... 
------------------------------------------
After adding penalty ... 
Lambda:  0.16596 | Training Loss:  0.32379
Lambda:  0.166 | Training Loss:  0.32379
Lambda:  0.16604 | Training Loss:  0.32379
Lambda:  0.16607 | Training Loss:  0.32379
Lambda:  0.16611 | Training Loss:  0.3238
Lambda:  0.16615 | Training Loss:  0.3238
Lambda:  0.16618 | Training Loss:  0.3238
Lambda:  0.16622 | Training Loss:  0.3238
Lambda:  0.16625 | Training Loss:  0.3238
Lambda:  0.16629 | Training Loss:  0.3238


In [28]:
fast_epoch = 10001
eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
eval_model.load_state_dict(torch.load('./model/MBS_Brain_n'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
lr_ft = 1e-2
optimizer = torch.optim.Adam(eval_model.parameters(), lr=lr_ft)
early_stopper = EarlyStopper(patience=10, min_delta=1e-5)

for t in range(1, fast_epoch):

    eval_model.train()
    # Forward pass: Compute predicted y by passing x to the modelsp

    WB1 = eval_model.classifier.sp1.control_p.data; DB1 = diag_mat_weights(WB1.size()[0]).to(device)


    train_loss = 0
    pch_acc = 0
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = eval_model(inputs)
        loss = criterion(outputs, labels) + (BestLambdaB/len(y_train)) * torch.norm(DB1 @ WB1)
        loss.backward()
        optimizer.step()
        train_loss += loss
        pch_acc += (torch.argmax(outputs, axis = 1) == labels).sum()

    avg_train_acc = pch_acc.item()/len(y_train)
    avg_train_loss = np.round(train_loss.item()/len(dataloader), 6)
    scheduler.step(avg_train_loss)

    if avg_train_loss < bloss:
        best_model = eval_model.state_dict()
        bloss = avg_train_loss

    if early_stopper.early_stop(avg_train_loss):
        print('Convergence!')
        with torch.no_grad():
            avg_test_acc = (torch.argmax(eval_model(X_test).detach(), axis = 1) == y_test).sum()/len(y_test)
            print(avg_test_acc)
            
        break 
        
    print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', avg_train_loss,' | Acc: ', avg_train_acc)
    
    eval_model.eval()
    if((t+1) % 10 == 0):
        with torch.no_grad():
            avg_test_acc = (torch.argmax(eval_model(X_test).detach(), axis = 1) == y_test).sum()/len(y_test)
            print(avg_test_acc)

| Epoch:  2 / 10001  | Loss:  0.339708  | Acc:  0.99
| Epoch:  3 / 10001  | Loss:  0.339449  | Acc:  0.99
| Epoch:  4 / 10001  | Loss:  0.339459  | Acc:  0.99
| Epoch:  5 / 10001  | Loss:  0.339493  | Acc:  0.99
| Epoch:  6 / 10001  | Loss:  0.340683  | Acc:  0.99
| Epoch:  7 / 10001  | Loss:  0.339579  | Acc:  0.99
| Epoch:  8 / 10001  | Loss:  0.339762  | Acc:  0.99
| Epoch:  9 / 10001  | Loss:  0.339756  | Acc:  0.99
| Epoch:  10 / 10001  | Loss:  0.339755  | Acc:  0.99
tensor(0.9100)
| Epoch:  11 / 10001  | Loss:  0.339774  | Acc:  0.99
| Epoch:  12 / 10001  | Loss:  0.339819  | Acc:  0.99
| Epoch:  13 / 10001  | Loss:  0.33984  | Acc:  0.99
Convergence!
tensor(0.9100)


## CNN - GAP

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        self.gap = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(32, 2)
        #self.flatten = nn.Flatten()
        self.sm = nn.Softmax(dim = 1)
    def forward(self, x):
        x = self.features(x)
        x = self.gap(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        x = self.sm(x)

        return x

In [16]:
dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

cnn = CNN()

learning_r = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_r)
Iteration = 10001; tor = 1e-5; bloss = 9999

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.2)
early_stopper = EarlyStopper(patience=10, min_delta=1e-5)

for t in range(Iteration):

    cnn.train()
    # Forward pass: Compute predicted y by passing x to the modelsp

    train_loss = 0
    pch_acc = 0
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = cnn(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss
        pch_acc += (torch.argmax(outputs, axis = 1) == labels).sum()

    avg_train_acc = pch_acc.item()/len(y_train)
    avg_train_loss = np.round(train_loss.item()/len(dataloader), 6)
    scheduler.step(avg_train_loss)

    if avg_train_loss < bloss:
        best_model = cnn.state_dict()
        bloss = avg_train_loss

    if early_stopper.early_stop(avg_train_loss):
        print('Convergence!')
        with torch.no_grad():
            avg_test_acc = (torch.argmax(cnn(X_test).detach(), axis = 1) == y_test).sum()/len(y_test)
            print(avg_test_acc)
        break 
        
    print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', avg_train_loss,' | Acc: ', avg_train_acc)
    
    tmc.eval()
    if((t+1) % 10 == 0):
        with torch.no_grad():
            avg_test_acc = (torch.argmax(cnn(X_test).detach(), axis = 1) == y_test).sum()/len(y_test)
            print(avg_test_acc)

| Epoch:  1 / 10001  | Loss:  0.684353  | Acc:  0.582
| Epoch:  2 / 10001  | Loss:  0.660086  | Acc:  0.5
| Epoch:  3 / 10001  | Loss:  0.636794  | Acc:  0.5
| Epoch:  4 / 10001  | Loss:  0.60846  | Acc:  0.652
| Epoch:  5 / 10001  | Loss:  0.563874  | Acc:  0.844
| Epoch:  6 / 10001  | Loss:  0.521396  | Acc:  0.886
| Epoch:  7 / 10001  | Loss:  0.49721  | Acc:  0.878
| Epoch:  8 / 10001  | Loss:  0.474551  | Acc:  0.886
| Epoch:  9 / 10001  | Loss:  0.471732  | Acc:  0.87
| Epoch:  10 / 10001  | Loss:  0.458934  | Acc:  0.886
tensor(0.8050)
| Epoch:  11 / 10001  | Loss:  0.449763  | Acc:  0.898
| Epoch:  12 / 10001  | Loss:  0.448291  | Acc:  0.894
| Epoch:  13 / 10001  | Loss:  0.443393  | Acc:  0.896
| Epoch:  14 / 10001  | Loss:  0.440811  | Acc:  0.898
| Epoch:  15 / 10001  | Loss:  0.439684  | Acc:  0.892
| Epoch:  16 / 10001  | Loss:  0.43945  | Acc:  0.896
| Epoch:  17 / 10001  | Loss:  0.434486  | Acc:  0.894
| Epoch:  18 / 10001  | Loss:  0.435455  | Acc:  0.89
| Epoch:  19 