In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import SplineTransformer
from torch.utils.data import DataLoader, TensorDataset
from collections import OrderedDict
from torch import nn
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torchvision
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F
from collections import Counter
import glob
import cv2
import os

In [2]:
def diag_mat_weights(dimp, type = 'first'):
    if type == 'first':
        dg = np.zeros((dimp-1, dimp))
        for i in range(dimp-1):
            dg[i,i] = -1
            dg[i,i+1]= 1
    elif type == 'second':
        dg = np.zeros((dimp-2, dimp))
        for i in range(dimp-2):
            dg[i,i] = -1
            dg[i,i+1]= 2
            dg[i,i+2]= -1
    else:
        pass
    return torch.Tensor(dg)
    

class BSL(nn.Module):
    def __init__(self, degree, num_knots, num_neurons, bias = True):
        super(BSL, self).__init__()
        self.degree = degree
        self.num_knots = num_knots
        self.num_neurons = num_neurons
        self.control_p = nn.Parameter(torch.randn(self.num_knots, self.num_neurons))
        
        if bias:
            self.bias = nn.Parameter(torch.randn(self.num_neurons))
        else:
            self.register_parameter('bias', None)
            
        self.inter = {}
    
    def basis_function(self, x, i, k, t):
    
        # Base case: degree 0 spline
        if k == 0:
            return ((t[i] <= x) & (x < t[i + 1])).float()
    
        # Recursive case
        denom1 = t[i + k] - t[i]
        denom2 = t[i + k + 1] - t[i + 1]
    
        term1 = 0
        if denom1 != 0:
            term1 = (x - t[i]) / denom1 * self.basis_function(x, i, k - 1, t)
    
        term2 = 0
        if denom2 != 0:
            term2 = (t[i + k + 1] - x) / denom2 * self.basis_function(x, i + 1, k - 1, t)
    
        return term1 + term2

    def knots_distribution(self, dg, nk):

        knots = torch.cat([torch.linspace(-0.002, -0.001, steps=dg),            # Add repeated values at the start for clamping
            torch.linspace(0, 1, nk-2*dg-2),  # Uniform knot spacing in the middle
            torch.linspace(1.001, 1.002, steps=dg)           # Add repeated values at the end for clamping
            ]).view(-1,1)
        
        knots = torch.cat([torch.linspace(0, 1, nk-2)          # Add repeated values at the end for clamping
            ]).view(-1,1)

        return knots
    
    def basis_function2(self, x, spl):
        basis_output = spl.fit_transform(x.cpu().numpy())
        return basis_output
            
    def forward(self, x):
        batch_size, num_features = x.size()
        device = x.device
        
        # Create knot vector and apply B-spline basis functions for each feature

        '''
        knots = torch.cat([
                        torch.zeros(self.degree),               # Add repeated values at the start for clamping
                        torch.linspace(0, 1, self.num_knots - self.degree + 1),  # Uniform knot spacing in the middle
                        torch.ones(self.degree)                 # Add repeated values at the end for clamping
                    ]).to(device)

        # Apply B-spline basis functions for each feature

        basises = []
        
        
        for feature in range(num_features):
            # Calculate B-spline basis functions for this feature
            basis = torch.stack([self.basis_function(x[:, feature], i, self.degree, knots) 
                                 for i in range(self.num_knots)], dim=-1)
            basises.append(basis)
            
        '''
    
        basises = []
        knots = self.knots_distribution(self.degree, self.num_knots)
        #knots = knots.to(device)
        spl = SplineTransformer(n_knots=self.num_knots, degree=self.degree, knots = knots)

        
        for feature in range(num_features):
            # Calculate B-spline basis functions for this feature
            
            basis = self.basis_function2(x[:, feature].reshape(-1,1), spl)
            basis = torch.Tensor(basis).to(device)
            basises.append(basis)
        
        if num_features == 1:
            tout = basises[0] @ self.control_p
            self.inter['basic'] = basises[0].T
        else:
            self.inter['basic'] = torch.reshape(torch.stack(basises, dim = 1), (batch_size, self.num_knots * self.num_neurons)).T
            basises = torch.stack(basises)
            tout = basises.permute(1,2,0) * self.control_p
            tout = tout.sum(dim =1)
                
        if self.bias is not None:
            tout += self.bias        
            
        return tout


class NormLayer(nn.Module):
    def __init__(self):
        super(NormLayer, self).__init__()

    def forward(self, x):
        min_val = torch.min(x, axis = 1).values.reshape(-1,1)
        max_val = torch.max(x, axis = 1).values.reshape(-1,1)

        x = (x - min_val)/(max_val - min_val)  # Rescale to [0, 1]
        return x.detach()
    
class BSpline_block(nn.Module):
    def __init__(self, degree, num_knots, num_neurons, dropout = 0.0, bias = True):
        super(BSpline_block, self).__init__()

        self.block = nn.Sequential(OrderedDict([
            ('norm', NormLayer()),
            ('BSL', BSL(degree = degree, num_knots = num_knots, num_neurons = num_neurons, bias = bias)),
            ('drop', nn.Dropout(dropout)),
        ]))
        
    def forward(self, x):
        return self.block(x)
        
class StackBS_block(nn.Module):
    def __init__(self, block, degree, num_knots, num_neurons, num_blocks, dropout = 0.0, bias = True):
        super().__init__()
        self.model = nn.ModuleDict({
            f'block_{i}': block(degree = degree, num_knots = num_knots, num_neurons = num_neurons)
            for i in range(num_blocks)
        })

    def forward(self, x):
        for name, block in self.model.items():
            x = block(x)
        return x

## Modeling

In [3]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    criterion = nn.CrossEntropyLoss() 
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 20 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
        scheduler.step()
        
def test(model, device, test_loader):
    model.eval()
    
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            output = torch.log_softmax(output, dim=1)
            _, pred = torch.max(output, dim = 1)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [5]:
transform_train = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

dataset_root = '/home/users/yhung7/DPS/Chest'
train_dir = os.path.join(dataset_root, 'train')
test_dir = os.path.join(dataset_root, 'val')

train_dataset = datasets.ImageFolder(root=train_dir, transform=transform_train)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform_test)

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = torchvision.models.densenet121(pretrained=True).to(device)

#for param in model.parameters():
    #param.requires_grad = False   
    
dg = 3; nk = 15; nm = 50; nl = 2; doutput = 2; Iteration = 100

num_ftrs = model.classifier.in_features
model.classifier = nn.Sequential(
    torch.nn.Linear(num_ftrs, nm),
    StackBS_block(BSpline_block, degree = dg, num_knots = nk, num_neurons = nm, num_blocks = nl, dropout = 0.0),
    torch.nn.Linear(nm, doutput),
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss() 
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # Learning Scheduler

In [68]:
for e in range(1, Iteration+1):
    train(model, device, train_loader, optimizer, Iteration)
    test(model, device, test_loader)


Test set: Average loss: -0.4572, Accuracy: 352/703 (50%)


Test set: Average loss: -0.4423, Accuracy: 360/703 (51%)


Test set: Average loss: -0.4417, Accuracy: 339/703 (48%)



KeyboardInterrupt: 

In [8]:
"""
Model setting:

`device`: running the program with cpu or gpu
`tmc`: the classifier that equip with DNN-S 
`nm` : number of neuron in DNN-S
`nk` : number of knot in DNN-S
`patientc` : (early-stop crierion) If the model didn't improve in n epoch then stop.
`patientr` : If the model didn't improve in n epoch then decrease learning rate with specific factor.

"""

# experiment setting
Iteration = 10000; bloss_list = []; tor = 1e-5; lr_tor = 1e-6
patientc = 10; patientr = 5; tpat = 0; bloss = 9999
nm = 100; nk = 15; doutput = 2
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model parameter 
tmc = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True)
learning_r = 1e-2
optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
criterion = nn.CrossEntropyLoss()

In [5]:
for t in range(Iteration):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = tmc(X_train)
    loss = criterion(pyb_af, y_train); bloss_list.append(loss.item())
    
    if (t > 0) and ((bloss_list[t-1]-bloss_list[t])<tor):        
        if (tpat % patientr) == 0:
            learning_r *= 0.2 
            tpat += 1
            #print('Learning rate reduce to ', learning_r)
            optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
            if learning_r <= lr_tor:
                print('Convergence!')
                break
        elif tpat < patientc:
            tpat += 1
            pass
        else:
            print('Convergence!')
            break
        
    else:
        if loss < bloss:
            #torch.save(tmc.state_dict(), './brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))
            bloss = loss.item()
            tpat = 0
        tpat += 1

    if tpat == patientc:
        print('Convergence!')
        break
    
    prediction = torch.argmax(pyb_af, axis = 1)
    acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if(t % 10 == 0):
        print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', np.round(loss.item(), 4),' | Acc: ', acc.item())
        if(t % 50 == 0):
            with torch.no_grad():
                print((torch.argmax(tmc(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

NameError: name 'X_train' is not defined

In [None]:
"""

`ECM_epoch`: number of epoch to run the ecm tuning

"""

print("Running the ECM tunning for penalty in each layer")

ECM_epoch = 10
with torch.no_grad():
    eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))

    WB = eval_model.classifier.sp1.control_p
    DB = diag_mat_weights(WB.size()[0], 'second').to(device)
    
    BestGCV = np.inf
    n = X_train.size()[0]
    
    for i in range(ECM_epoch):
        eval_model.train()
        MPSy = eval_model(X_train)

        # update following layer except for last layer
        LambdaB1 = ECM(model = eval_model.classifier, num_neurons = nm, num_knots = nk, L = 1)
        
        B1 = eval_model.classifier.inter['ebasic']
        By1 = eval_model.classifier.inter['basic']
        P2 = By1 @ torch.inverse(By1.T @ By1) @ By1.T

        
        size1 = B1.size()[1]
        B1 = B1.view(nk, nm, size1)

        NW1 = torch.empty((nk, nm))
        NB1 = torch.empty((nm))

        
        for i in range(nm):
            B1y = By1[:,i] - eval_model.classifier.sp1.bias.data[i]
            BB1 = B1[:,i].T

            # Update the weights and bias
            NW1[:, i] = (torch.inverse(BB1.T @ BB1 + (LambdaB1/size1) * (DB.T @ DB)) @ BB1.T @ B1y)
            NB1[i] = torch.mean(By1[:,i] - (NW1[:,i] @ BB1.T))
            
        # update the weight
        getattr(eval_model.classifier.sp1, 'control_p').data = NW1; getattr(eval_model.classifier.sp1, 'bias').data = NB1

        # update the last layer
        WholeB = torch.cat((torch.ones((n,1)), By1), dim = 1)
        NLn2W = (torch.inverse(WholeB.T @ WholeB) @ WholeB.T @ MPSy.type(torch.FloatTensor)).T
        getattr(eval_model.classifier.ln2, 'bias').data = NLn2W[:,0]; getattr(eval_model.classifier.ln2, 'weight').data = NLn2W[:,1:]
        
        eval_model.eval()
        pred_postecm = eval_model(X_train)
        CLoss = criterion(pred_postecm.detach(), y_train)
        trainGCV = CLoss/(n-torch.trace(P2))**2
        
        if trainGCV < BestGCV:
            BestLambdaB = LambdaB1
            BestGCV = trainGCV
            
        print(f"Lambda: {np.round(LambdaB1, 5)}| Training Loss: {np.round(CLoss, 5)}| Training GCV: {trainGCV.item()}")

In [None]:
"""

`fast_epoch`: number of epoch to run the fast tuning

"""

fast_epoch = 201
DPS = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
DPS.load_state_dict(torch.load('./brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
lr_ft = 1e-2
optimizer = torch.optim.Adam(DPS.parameters(), lr=lr_ft)

for t in range(1, fast_epoch):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = DPS(X_train)

    WB1 = DPS.classifier.sp1.control_p.data; DB1 = diag_mat_weights(WB1.size()[0]).to(device)

    loss = criterion(pyb_af, y_train) + (BestLambdaB/n) * torch.norm(DB1 @ WB1)
    bloss_list.append(loss.item())
    
    prediction = torch.argmax(pyb_af, axis = 1)
    acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if t % 10 == 0:
        print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', loss.item(),' | Acc: ', np.round(acc.item(), 5))
        if t % 100 == 0:
            with torch.no_grad():
                print((torch.argmax(DPS(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

In [None]:
class PRODBSplineLayerMultiFeature(nn.Module):
    def __init__(self, input_dim, degree, num_knots, output_dim, num_neurons, bias = True):
        super(PRODBSplineLayerMultiFeature, self).__init__()
        self.degree = degree
        self.num_knots = num_knots
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_neurons = num_neurons
        
        if input_dim == 2:
            self.control_p = nn.Parameter(torch.randn(self.num_knots**2, self.output_dim))
        else:
            self.control_p = nn.Parameter(torch.randn(self.num_knots, self.num_neurons))
        if bias:
            self.bias = nn.Parameter(torch.randn(self.num_neurons))
        else:
            self.register_parameter('bias', None)
            
        self.inter = {}
    
    def basis_function(self, x, i, k, t):
    
        # Base case: degree 0 spline
        if k == 0:
            return ((t[i] <= x) & (x < t[i + 1])).float()
    
        # Recursive case
        denom1 = t[i + k] - t[i]
        denom2 = t[i + k + 1] - t[i + 1]
    
        term1 = 0
        if denom1 != 0:
            term1 = (x - t[i]) / denom1 * self.basis_function(x, i, k - 1, t)
    
        term2 = 0
        if denom2 != 0:
            term2 = (t[i + k + 1] - x) / denom2 * self.basis_function(x, i + 1, k - 1, t)
    
        return term1 + term2
    
    def forward(self, x):
        batch_size, num_features = x.size()
        device = x.device
        
        # Create knot vector
        # knots = torch.linspace(0, 1, self.num_knots + self.degree + 1).to(device)
        knots = torch.cat([
                        torch.zeros(self.degree),               # Add repeated values at the start for clamping
                        torch.linspace(0, 1, self.num_knots - self.degree + 1),  # Uniform knot spacing in the middle
                        torch.ones(self.degree)                 # Add repeated values at the end for clamping
                    ]).to(device)
        # Apply B-spline basis functions for each feature
        basises = []
    
        
        for feature in range(num_features):
            # Calculate B-spline basis functions for this feature
            basis = torch.stack([self.basis_function(x[:, feature], i, self.degree, knots) 
                                 for i in range(self.num_knots)], dim=-1)
            basises.append(basis)
            
        if num_features == 1:
            tout = basises[0] @ self.control_p
            self.inter['eachbasic'] = basises[0].T
        else:
            #self.inter['basic'] = torch.reshape(torch.stack(basises, dim = 1), (batch_size, self.num_knots * self.num_neurons)).T
            self.inter['eachbasic'] = torch.reshape(torch.stack(basises, dim = 1), (batch_size, self.num_knots * self.num_neurons)).T
            
            basises = torch.stack(basises)
            tout = basises.permute(1,2,0) * self.control_p
            tout = tout.sum(dim =1)
                
        if self.bias is not None:
            tout += self.bias        

        self.inter['basicoutput'] = tout
        
        return tout
        
class DNNS2(nn.Module):
    def __init__(self, input_dim, degree, num_knots, num_neurons, output_dim, bias):
        super(DNNS2, self).__init__()
        self.num_neurons = num_neurons
        self.num_knots = num_knots
        self.ln1 = nn.Linear(input_dim, num_neurons)
        self.nm1 = NormLayer() 
        self.sp1 = PRODBSplineLayerMultiFeature(input_dim = 1, degree = degree, num_knots = num_knots, num_neurons = num_neurons, output_dim= output_dim, bias = True)
        self.nm2 = NormLayer() 
        self.sp2 = PRODBSplineLayerMultiFeature(input_dim = 1, degree = degree, num_knots = num_knots, num_neurons = num_neurons, output_dim= output_dim, bias = True)
        self.ln2 = nn.Linear(num_neurons, output_dim)
        self.inter = {}
        
    def forward(self, x):
        ln1out = self.ln1(x)
        ln1out = self.nm1(ln1out)
        
        device = ln1out.device
        batch_size, _ = x.size()
        
        # # # # # # # # # # # # # #
        #         SPLINE          #
        # # # # # # # # # # # # # #
        
        sp1out = self.sp1(ln1out)
        sp1out = self.nm2(sp1out)

        # # # # # # # # # # # # # #
        #         SPLINE 2        #
        # # # # # # # # # # # # # #
        
        sp2out = self.sp2(sp1out)
        ln2out = self.ln2(sp2out)
        
        return ln2out

In [None]:
class TumorClassifier(nn.Module):
    def __init__(self, Fin, dg, nk, nm, Fout, bias):
        super(TumorClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.gap = nn.Flatten()
        self.classifier = DNNS2(input_dim = 32*56*56, degree = dg, num_knots = nk, num_neurons = nm, output_dim = Fout, bias = True).to(device)
        self.sm = nn.Softmax(dim = 1)
    def forward(self, x):
        x = self.features(x)
        x = self.gap(x)
        x = self.classifier(x)
        x = self.sm(x)

        return x

class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [232]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

"""
Model setting:

`device`: running the program with cpu or gpu
`tmc`: the classifier that equip with DNN-S 
`nm` : number of neuron in DNN-S
`nk` : number of knot in DNN-S
`patientc` : (early-stop crierion) If the model didn't improve in n epoch then stop.
`patientr` : If the model didn't improve in n epoch then decrease learning rate with specific factor.

"""

# experiment setting
Iteration = 10000; bloss_list = []; tor = 1e-5; lr_tor = 1e-6
patientc = 10; patientr = 5; tpat = 0; bloss = 9999
nm = 100; nk = 15; doutput = 2
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model parameter 
tmc = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True)
learning_r = 1e-2
optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
criterion = nn.CrossEntropyLoss()

In [233]:
for t in range(Iteration):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = tmc(X_train)
    loss = criterion(pyb_af, y_train); bloss_list.append(loss.item())
    
    if (t > 0) and ((bloss_list[t-1]-bloss_list[t])<tor):        
        if (tpat % patientr) == 0:
            learning_r *= 0.2 
            tpat += 1
            #print('Learning rate reduce to ', learning_r)
            optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)
            if learning_r <= lr_tor:
                print('Convergence!')
                break
        elif tpat < patientc:
            tpat += 1
            pass
        else:
            print('Convergence!')
            break
        
    else:
        if loss < bloss:
            #torch.save(tmc.state_dict(), './brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk))
            bloss = loss.item()
            tpat = 0
        tpat += 1

    if tpat == patientc:
        print('Convergence!')
        break
    
    prediction = torch.argmax(pyb_af, axis = 1)
    acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if(t % 10 == 0):
        print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', np.round(loss.item(), 4),' | Acc: ', acc.item())
        if(t % 100 == 0):
            with torch.no_grad():
                print((torch.argmax(tmc(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

| Epoch:  1 / 100  | Loss:  0.6802  | Acc:  0.593999981880188
tensor(0.5733)
| Epoch:  11 / 100  | Loss:  0.6035  | Acc:  0.722000002861023
| Epoch:  21 / 100  | Loss:  0.5478  | Acc:  0.7919999957084656
| Epoch:  31 / 100  | Loss:  0.5064  | Acc:  0.8199999928474426
| Epoch:  41 / 100  | Loss:  0.4776  | Acc:  0.8560000061988831
| Epoch:  51 / 100  | Loss:  0.4547  | Acc:  0.8899999856948853
tensor(0.7667)
| Epoch:  61 / 100  | Loss:  0.4357  | Acc:  0.8939999938011169
| Epoch:  71 / 100  | Loss:  0.4198  | Acc:  0.9139999747276306
| Epoch:  81 / 100  | Loss:  0.4061  | Acc:  0.9259999990463257
| Epoch:  91 / 100  | Loss:  0.3934  | Acc:  0.9419999718666077


In [260]:
with torch.no_grad():
    eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./2brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
    pred_postecm = eval_model(X_train)
    CLoss = criterion(pred_postecm.detach(), y_train)

In [238]:
def ECM(model, num_neurons, num_knots, initial_xi = 1, initial_sigma = 1, initial_lambda = 1e-4):
    lambdab = initial_lambda
    sigma = initial_sigma
    xi = initial_xi

    B = model.inter['eachbasic']
    By = model.inter['basicoutput']
    WB = model.control_p
        
    DB = diag_mat_weights(WB.size()[0]).to(device)
    size = B.size()[1]
    S = DB.T @ DB
    Cov_a = (xi**2)* torch.linalg.pinv(S)
    Cov_e = torch.eye(size*num_neurons)* sigma
    
    block_y = torch.reshape(By, (-1,1))
    flatB = B.view(num_neurons, num_knots, size)
        
    sqr_xi= 0
    sqr_sig = 0
    
    for i in range(num_neurons):
        Ncov = (Cov_a -(Cov_a @ flatB[i]) @ (torch.linalg.pinv(flatB[i].T @ Cov_a @ flatB[i] + Cov_e[size*i:size*(i+1),size*i:size*(i+1)]) @ flatB[i].T @ Cov_a))
        Nmu = (Cov_a @ flatB[i]) @ (torch.linalg.pinv(flatB[i].T @ Cov_a @ flatB[i] + Cov_e[size*i:size*(i+1),size*i:size*(i+1)])) @ By[:,i].reshape(-1,1)
        
        first_xi = S @ Ncov
        second_xi = (Nmu.T @ S @ Nmu)
        sqr_xi += torch.trace(first_xi) + second_xi
            
        first_sig = torch.norm(By[:,i])
        second_sig = 2 * (By[:,i] @ flatB[i].T) @ Nmu 
        third_sig = torch.trace((flatB[i] @ flatB[i].T) @ Ncov)
        four_sig = (Nmu.T @ flatB[i] @ flatB[i].T @ Nmu)
        
        sqr_sig += (first_sig + second_sig + third_sig + four_sig)
    
    sqr_xi /= num_neurons
    sqr_sig /= (num_neurons*size)
    
    Lambda = sqr_sig/sqr_xi
    
    return Lambda.item()

In [257]:
"""

`ECM_epoch`: number of epoch to run the ecm tuning

"""

LoP = 2
PSname = ['sp'+str(i+1) for i in range(LoP)]
ECM_epoch = 10

print("Running the ECM tunning for penalty in each layer")

with torch.no_grad():
    eval_model = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
    eval_model.load_state_dict(torch.load('./2brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))

    BestGCV = np.inf
    n = X_train.size()[0]
    
    for i in range(ECM_epoch):
        eval_model.train()
        MPSy = eval_model(X_train)
    
        # update following layer except for last layer
        LambdaL = []
        for layer in PSname:
            splayer = getattr(eval_model.classifier, layer)
            WB = getattr(splayer, 'control_p')
            DB = diag_mat_weights(WB.size()[0], 'second').to(device)
            LambdaB = ECM(model = getattr(eval_model.classifier, layer), num_neurons = nm, num_knots = nk)
            LambdaL.append(LambdaB)
            
            B1 = getattr(eval_model.classifier, layer).inter['eachbasic']
            By1 = getattr(eval_model.classifier, layer).inter['basicoutput']
            P2 = By1 @ torch.inverse(By1.T @ By1) @ By1.T
    
            size1 = B1.size()[1]
            B1 = B1.view(nk, nm, size1)
    
            NW1 = torch.empty((nk, nm))
            NB1 = torch.empty((nm))
    
        
            for i in range(nm):
                B1y = By1[:,i] - getattr(eval_model.classifier, layer).bias.data[i]
                BB1 = B1[:,i].T
        
                # Update the weights and bias
                NW1[:, i] = (torch.inverse(BB1.T @ BB1 + (LambdaB/size1) * (DB.T @ DB)) @ BB1.T @ B1y)
                NB1[i] = torch.mean(By1[:,i] - (NW1[:,i] @ BB1.T))
                
            # update the weight
            getattr(splayer, 'control_p').data = NW1; getattr(splayer, 'bias').data = NB1
    
        # update the last layer
        WholeB = torch.cat((torch.ones((n,1)), By1), dim = 1)
        NLn2W = (torch.inverse(WholeB.T @ WholeB) @ WholeB.T @ MPSy.type(torch.FloatTensor)).T
        
        getattr(eval_model.classifier.ln2, 'bias').data = NLn2W[:,0]; getattr(eval_model.classifier.ln2, 'weight').data = NLn2W[:,1:]
        
        eval_model.eval()
        pred_postecm = eval_model(X_train)
        CLoss = criterion(pred_postecm.detach(), y_train)
        trainGCV = CLoss/(n-torch.trace(P2))**2
        
        if trainGCV < BestGCV:
            BestLambdaB = LambdaB1
            BestGCV = trainGCV
            
        print(f"Lambda: {LambdaL}| Training Loss: {np.round(CLoss, 5)}| Training GCV: {trainGCV.item()}")

Running the ECM tunning for penalty in each layer
Lambda: [0.30183088779449463, 0.30312493443489075]| Training Loss: 0.7657999992370605| Training GCV: 4.786255885846913e-06
Lambda: [0.973455011844635, 0.6522708535194397]| Training Loss: 0.8041099905967712| Training GCV: 4.120976882404648e-06
Lambda: [1.3706945180892944, 0.3747212588787079]| Training Loss: 0.7974600195884705| Training GCV: 5.1150018407497555e-06
Lambda: [1.8239096403121948, 0.3827195465564728]| Training Loss: 0.8612599968910217| Training GCV: 4.769452061736956e-06
Lambda: [1.5904974937438965, 1.7096126079559326]| Training Loss: 0.7672600150108337| Training GCV: 3.7891347801632946e-06
Lambda: [1.670275092124939, 1.7180237770080566]| Training Loss: 0.7532600164413452| Training GCV: 3.5555015074351104e-06
Lambda: [1.777961254119873, 0.859359860420227]| Training Loss: 0.907260000705719| Training GCV: 4.15523027186282e-06
Lambda: [1.9107773303985596, 1.430779218673706]| Training Loss: 0.7932599782943726| Training GCV: 3.4246

In [264]:
"""

`fast_epoch`: number of epoch to run the fast tuning

"""

fast_epoch = 201
D2PS = TumorClassifier(Fin = 100352, dg = 3, nk = nk, nm = nm, Fout = doutput, bias = True).to(device)
D2PS.load_state_dict(torch.load('./2brainimg'+str(X_train.size()[0])+'h'+str(nm)+'k'+str(nk), weights_only = True))
lr_ft = 1e-2
optimizer = torch.optim.Adam(D2PS.parameters(), lr=lr_ft)

In [None]:
for t in range(1, fast_epoch):

    # Forward pass: Compute predicted y by passing x to the modelsp
    pyb_af = D2PS(X_train)
    loss = criterion(pyb_af, y_train)
    
    for l in range(len(PSname)):
        WB = getattr(D2PS.classifier, PSname[l]).control_p.data; DB = diag_mat_weights(WB.size()[0]).to(device)
        loss += (LambdaL[l]/n) * torch.norm(DB @ WB)
            
    prediction = torch.argmax(pyb_af, axis = 1)
    acc = (torch.argmax(pyb_af, axis = 1) == y_train).sum()/len(y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if t % 10 == 0:
        print('| Epoch: ',t+1,'/',str(Iteration),' | Loss: ', loss.item(),' | Acc: ', np.round(acc.item(), 5))
        if t % 100 == 0:
            with torch.no_grad():
                print((torch.argmax(D2PS(X_test).detach(), axis = 1) == y_test).sum()/len(y_test))

In [17]:
def ECM(par, initial_xi = 1, initial_sigma = 1, initial_lambda = 1e-4):
    lambdab = initial_lambda
    sigma = initial_sigma
    xi = initial_xi
    
    n_block, num_knots, num_neurons = par['wbasic'].size()
    ls_lambda = torch.empty(n_block)
    
    for l in range(n_block):
        B = par['ebasic'][l]
        By = par['basic'][l]
        WB = par['wbasic'][l]
        
        DB = diag_mat_weights(WB.size()[0]).to(device)
        size = B.size()[1]
        S = DB.T @ DB
        Cov_a = (xi**2)* torch.linalg.pinv(S)
        Cov_a.to(device)
        Cov_e = (torch.eye(size*num_neurons)* sigma).to(device)
        
        block_y = torch.reshape(By, (-1,1))
        flatB = B.view(num_neurons, num_knots, size)
            
        sqr_xi= 0
        sqr_sig = 0

        for i in range(num_neurons):
            Ncov = (Cov_a -(Cov_a @ flatB[i]) @ (torch.linalg.inv(flatB[i].T @ Cov_a @ flatB[i] + Cov_e[size*i:size*(i+1),size*i:size*(i+1)]) @ flatB[i].T @ Cov_a))
            Nmu = (Cov_a @ flatB[i]) @ (torch.linalg.inv(flatB[i].T @ Cov_a @ flatB[i] + Cov_e[size*i:size*(i+1),size*i:size*(i+1)])) @ By[:,i].reshape(-1,1)
            
            first_xi = S @ Ncov
            second_xi = (Nmu.T @ S @ Nmu)
            sqr_xi += torch.trace(first_xi) + second_xi
                
            first_sig = torch.norm(By[:,i])
            second_sig = 2 * (By[:,i] @ flatB[i].T) @ Nmu 
            third_sig = torch.trace((flatB[i] @ flatB[i].T) @ Ncov)
            four_sig = (Nmu.T @ flatB[i] @ flatB[i].T @ Nmu)
            
            sqr_sig += (first_sig + second_sig + third_sig + four_sig)
            
            del first_xi, second_xi, first_sig, second_sig, third_sig, four_sig

        sqr_xi /= num_neurons
        sqr_sig /= (num_neurons*size)

        ls_lambda[l] = (sqr_sig/sqr_xi).item()
        
        del Cov_a, Cov_e, flatB
    
    return ls_lambda
    
def ECM_layersise_update(model, par, Lambda, x, y):

    model.eval()
    '''
    with torch.no_grad():
        DSy = model(x)
        print('Training Error: ', np.round(criterion(y, DSy.detach()).item(), 5))
    '''
    
    device = x.device
    
    B_out, B_in, B_w, B_b = par['basic'], par['ebasic'], par['wbasic'], par['bbasic']
    n_layer, nk, nm = B_w.size()
    DB = diag_mat_weights(B_w[0].size()[0], 'second').to(device)

    Project_matrix = (torch.linalg.pinv(B_in[-1].T @ B_in[-1]) @ B_in[-1].T @ B_in[-1])
    Size = [b.size()[1] for b in B_in]
    B_in = B_in.view(n_layer, nm, nk, Size[0])
    
    for l in range(n_layer):    
        NW = torch.empty((nk, nm)).to(device)
        NB = torch.empty((nm)).to(device)
        
        for i in range(nm):
            B1y = B_out[l][:,i] - B_b[l][i]
            BB = B_in[l][i].T
    
            # Update the weights and bias
            NW[:, i] = (torch.linalg.inv(BB.T @ BB + (Lambda[l]/Size[l]) * (DB.T @ DB)) @ BB.T @ B1y)
            NB[i] = torch.mean(B_out[l][:,i] - (NW[:,i] @ BB.T))
                
        # update the weight
        block = getattr(model.Spline_block.model, f'block_{l}')
        getattr(block.block.BSL, 'control_p').data = NW
        getattr(block.block.BSL, 'bias').data = NB

    
    with torch.no_grad():
        DPSy = model(x)
        #Update_Train_Loss = np.round(criterion(y, DPSy.detach()).item(), 5)
        GCV = np.round((torch.sum((y - DPSy)**2)/(x.size()[0]-torch.trace(Project_matrix))**2).item(), 5)
        
    return model, GCV

def ECM_update(model, max_iter, x, y):
    BestGCV = 9999
    patient = 10
    pcount = 0
    for i in range(max_iter):
        _ = model(x)
        ECM_para = model.get_para_ecm(x)
        ECM_Lambda = ECM(ECM_para, initial_xi = 1, initial_sigma = 1, initial_lambda = 1e-4)

        print('Lambda: ', ECM_Lambda)
        model, GCV = ECM_layersise_update(model, ECM_para, ECM_Lambda, x, y)
        print(GCV)
        if GCV < BestGCV:
            
            BestLambda = ECM_Lambda
            BestGCV = GCV
            pcount = 0
        else:
            pcount += 1

        if pcount == patient:
            break

        del ECM_para, ECM_Lambda
    
    return BestLambda

class BSL(nn.Module):
    def __init__(self, degree, num_knots, num_neurons, bias = True):
        super(BSL, self).__init__()
        self.degree = degree
        self.num_knots = num_knots
        self.num_neurons = num_neurons
        self.control_p = nn.Parameter(torch.randn(self.num_knots, self.num_neurons))
        
        if bias:
            self.bias = nn.Parameter(torch.randn(self.num_neurons))
        else:
            self.register_parameter('bias', None)
            
        self.inter = {}
    
    def basis_function(self, x, i, k, t):
    
        # Base case: degree 0 spline
        if k == 0:
            return ((t[i] <= x) & (x < t[i + 1])).float()
    
        # Recursive case
        denom1 = t[i + k] - t[i]
        denom2 = t[i + k + 1] - t[i + 1]
    
        term1 = 0
        if denom1 != 0:
            term1 = (x - t[i]) / denom1 * self.basis_function(x, i, k - 1, t)
    
        term2 = 0
        if denom2 != 0:
            term2 = (t[i + k + 1] - x) / denom2 * self.basis_function(x, i + 1, k - 1, t)
    
        return term1 + term2

    def knots_distribution(self, dg, nk):

        knots = torch.cat([torch.linspace(-0.002, -0.001, steps=dg),            # Add repeated values at the start for clamping
            torch.linspace(0, 1, nk-2*dg-2),  # Uniform knot spacing in the middle
            torch.linspace(1.001, 1.002, steps=dg)           # Add repeated values at the end for clamping
            ]).view(-1,1)
        
        knots = torch.cat([torch.linspace(0, 1, nk-2)          # Add repeated values at the end for clamping
            ]).view(-1,1)

        return knots
    
    def basis_function2(self, x, spl):
        basis_output = spl.fit_transform(x.cpu().numpy())
        return basis_output
            
    def forward(self, x):
        batch_size, num_features = x.size()
        device = x.device
        
        # Create knot vector and apply B-spline basis functions for each feature

        '''
        knots = torch.cat([
                        torch.zeros(self.degree),               # Add repeated values at the start for clamping
                        torch.linspace(0, 1, self.num_knots - self.degree + 1),  # Uniform knot spacing in the middle
                        torch.ones(self.degree)                 # Add repeated values at the end for clamping
                    ]).to(device)

        # Apply B-spline basis functions for each feature

        basises = []
        
        
        for feature in range(num_features):
            # Calculate B-spline basis functions for this feature
            basis = torch.stack([self.basis_function(x[:, feature], i, self.degree, knots) 
                                 for i in range(self.num_knots)], dim=-1)
            basises.append(basis)
            
        '''
    
        basises = []
        knots = self.knots_distribution(self.degree, self.num_knots)
        #knots = knots.to(device)
        spl = SplineTransformer(n_knots=self.num_knots, degree=self.degree, knots = knots)

        
        for feature in range(num_features):
            # Calculate B-spline basis functions for this feature
            
            basis = self.basis_function2(x[:, feature].reshape(-1,1), spl)
            basis = torch.Tensor(basis).to(device)
            basises.append(basis)
        
        if num_features == 1:
            tout = basises[0] @ self.control_p
            self.inter['basic'] = basises[0].T
        else:
            self.inter['basic'] = torch.reshape(torch.stack(basises, dim = 1), (batch_size, self.num_knots * self.num_neurons)).T
            basises = torch.stack(basises)
            tout = basises.permute(1,2,0) * self.control_p
            tout = tout.sum(dim =1)
                
        if self.bias is not None:
            tout += self.bias        
            
        return tout


class NormLayer(nn.Module):
    def __init__(self):
        super(NormLayer, self).__init__()

    def forward(self, x):
        min_val = torch.min(x, axis = 1).values.reshape(-1,1)
        max_val = torch.max(x, axis = 1).values.reshape(-1,1)

        x = (x - min_val)/(max_val - min_val)  # Rescale to [0, 1]
        return x.detach()
    
class BSpline_block(nn.Module):
    def __init__(self, degree, num_knots, num_neurons, dropout = 0.0, bias = True):
        super(BSpline_block, self).__init__()

        self.block = nn.Sequential(OrderedDict([
            ('norm', NormLayer()),
            ('BSL', BSL(degree = degree, num_knots = num_knots, num_neurons = num_neurons, bias = bias)),
            ('drop', nn.Dropout(dropout)),
        ]))
        
    def forward(self, x):
        return self.block(x)
        
class StackBS_block(nn.Module):
    def __init__(self, block, degree, num_knots, num_neurons, num_blocks, dropout = 0.0, bias = True):
        super().__init__()
        self.model = nn.ModuleDict({
            f'block_{i}': block(degree = degree, num_knots = num_knots, num_neurons = num_neurons)
            for i in range(num_blocks)
        })

    def forward(self, x):
        for name, block in self.model.items():
            x = block(x)
        return x

In [188]:
import torchvision
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
#trainset = torch.utils.data.Subset(trainset, list(range(10000)))

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=32, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
#testset = torch.utils.data.Subset(testset, list(range(1000)))
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)


classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [189]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 20 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
class DPS_Image(nn.Module):
    def __init__(self, dg, nk, nm, nbl, dropout, Fout, bias):
        super(DPS_Image, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.2)
        )
        self.gap = nn.Flatten()
        self.ln1 = nn.Linear(2048, nm)
        self.Spline_block = StackBS_block(BSpline_block, degree = dg, num_knots = nk, num_neurons = nm, num_blocks = nbl, dropout = dropout)
        self.ln3 = nn.Linear(nm, Fout)
        
    def forward(self, x):
        x = self.features(x)
        x = self.gap(x)
        x = F.relu(self.ln1(x))
        #x = self.Spline_block(x)
        x = self.ln3(x)
        #x = self.Spline_block(x)
        output = F.log_softmax(x, dim=1)
        
        return output

'''
train_kwargs = {'batch_size': 32}
test_kwargs = {'batch_size': 32}

transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
dataset1 = datasets.MNIST('../data', train=True, download=True,
                   transform=transform)
dataset2 = datasets.MNIST('../data', train=False,
                   transform=transform)

mnist_subset_train = torch.utils.data.Subset(dataset1, list(range(10000)))
mnist_subset_test = torch.utils.data.Subset(dataset2, list(range(1000)))


# Create a DataLoader for the subset
train_loader = torch.utils.data.DataLoader(mnist_subset_train,**train_kwargs)
test_loader = torch.utils.data.DataLoader(mnist_subset_test, **test_kwargs)
'''

# experiment setting
Iteration = 10000; dg = 3; nm = 20; nk = 8; doutput = 10; nl = 1
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model parameter 
tmc = DPS_Image(dg = dg, nk = nk, nm = nm, nbl = nl, dropout = 0.0, Fout = doutput, bias = True).to(device)
learning_r = 3e-3; gamma = 0.5
optimizer = torch.optim.Adam(tmc.parameters(), lr=learning_r)

In [193]:
model = torchvision.models.resnet50(pretrained=True).to(device)
    
for param in model.parameters():
    param.requires_grad = False   
  
'''
model.fc = nn.Sequential(
               nn.Linear(2048, nm),
                #StackBS_block(BSpline_block, degree = dg, num_knots = nk, num_neurons = nm, num_blocks = nl, dropout = 0.0),
                nn.Linear(nm, 10)).to(device)
'''

num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 1024)
model.fc = nn.Sequential(
    torch.nn.Dropout(0.5),
    torch.nn.Linear(num_ftrs, 1024),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(1024, 512),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(512, 256),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(256, nm),
    torch.nn.Dropout(0.2),
    #StackBS_block(BSpline_block, degree = dg, num_knots = nk, num_neurons = nm, num_blocks = nl, dropout = 0.0),
    torch.nn.Linear(nm, doutput),
).to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_r, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

for epoch in range(1, Iteration + 1):
    train(model, device, trainloader, optimizer, Iteration)
    test(model, device, testloader)
    scheduler.step()

In [87]:
class Net(nn.Module):
    def __init__(self, dg, nk, nm, nbl, dropout, Fout, bias):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv2_drop = nn.Dropout2d()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(800, nm)
        self.Spline_block = StackBS_block(BSpline_block, degree = dg, num_knots = nk, num_neurons = nm, num_blocks = nbl, dropout = dropout)
        self.fc2 = nn.Linear(nm, Fout)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.Spline_block(x)
        #x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

Iteration = 10000; dg = 3; nm = 10; nk = 50; doutput = 10; nl = 1; learning_r = 5e-2
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

network = Net(dg = dg, nk = nk, nm = nm, nbl = nl, dropout = 0.0, Fout = doutput, bias = True).to(device)
optimizer = torch.optim.Adam(network.parameters(), lr=learning_r)

In [88]:
scheduler = StepLR(optimizer, step_size=1, gamma= gamma)
for epoch in range(1, Iteration + 1):
    train(network, device, train_loader, optimizer, Iteration)
    test(network, device, test_loader)
    scheduler.step()



ValueError: Input X contains NaN.
SplineTransformer does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values