In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import torch.nn.functional as F
import count_sparsity
from data_generator1 import simulate_data
from SGHMC_SPL_FC import SGHMC_SPL

In [2]:
device1 = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
training_data=simulate_data(N=10240,P=1000,device=device1)
testing_data=simulate_data(N=1024,P=1000,device=device1)

train_dataloader = DataLoader(training_data, batch_size=1024,shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=512,shuffle=False)

In [4]:
class FCNetwork(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(input_dim, 5)
        self.linear2 = nn.Linear(5, 3)
        self.linear3 = nn.Linear(3, output_dim)
        
    def forward(self, x):
                
        x = self.flatten(x)     
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)

        return x

In [5]:
net = FCNetwork(1000, 1).to(device1)
criterion = nn.MSELoss()
optimizer = SGHMC_SPL(net.parameters(),N=len(train_dataloader.dataset),weight_decay_1=2e-4,weight_decay_0=1e-2,soft_threshold=1e-2,hard_threshold=1e-3,warm_up=50)

In [6]:
epochs=3000
num_batch = len(train_dataloader.dataset)/1024+1
C2_0 = 0.01 # initial step size
M = 6 # number of cycles
T = epochs*num_batch # total number of iterations

In [7]:
def adjust_learning_rate(epoch, batch_idx):
    rcounter = epoch*num_batch+batch_idx
    cos_inner = np.pi * (rcounter % (T // M))
    cos_inner /= T // M
    cos_out = np.cos(cos_inner) + 1
    C2 = 0.5*cos_out*C2_0
    return C2

def train_loop(epoch,dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    print('\nEpoch: %d' % (epoch+1))
    running_loss = 0.0
    
    model.train()
        
    for batch, (X, Y) in enumerate(dataloader):
        
        # Compute prediction and loss
        X=X.to(device1)
        Y=Y.to(device1)
        pred = model(X)
        loss = loss_fn(pred, Y)
        C = adjust_learning_rate(epoch+1,batch)**0.5
        eta=1
        T=(1/size)**0.5
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step(C,epoch=epoch,batch=batch,T=T)

        running_loss += loss.item()


def test_loop(epoch,dataloader, model, loss_fn):
    num_batches = len(dataloader)
    test_loss = 0
    model.eval()
    
    with torch.no_grad():
        for X, Y in dataloader:
            X=X.to(device1)
            Y=Y.to(device1)
            pred = model(X)
            test_loss += loss_fn(pred, Y).item()
    
    test_loss /= num_batches
    print(f"Avg loss: {test_loss:>8f} \n")
  

In [8]:
start_epoch = 0
for epoch in range(start_epoch, start_epoch+epochs-1):
               
    train_loop(epoch,dataloader=train_dataloader,model=net,loss_fn=criterion,optimizer=optimizer)
    test_loop(epoch,dataloader=test_dataloader, model=net, loss_fn=criterion)
    
    sparsity_hard=count_sparsity.sparsity(net)
    print(sparsity_hard)
  


Epoch: 1
Avg loss: 3.485959 

tensor(0.)

Epoch: 2
Avg loss: 3.433026 

tensor(0.)

Epoch: 3
Avg loss: 3.413862 

tensor(0.)

Epoch: 4
Avg loss: 3.398223 

tensor(0.)

Epoch: 5
Avg loss: 3.385856 

tensor(0.)

Epoch: 6
Avg loss: 3.373995 

tensor(0.)

Epoch: 7
Avg loss: 3.363832 

tensor(0.)

Epoch: 8
Avg loss: 3.354386 

tensor(0.)

Epoch: 9
Avg loss: 3.341294 

tensor(0.)

Epoch: 10
Avg loss: 3.321783 

tensor(0.)

Epoch: 11
Avg loss: 3.296381 

tensor(0.)

Epoch: 12
Avg loss: 3.269152 

tensor(0.)

Epoch: 13
Avg loss: 3.246793 

tensor(0.)

Epoch: 14
Avg loss: 3.235861 

tensor(0.)

Epoch: 15
Avg loss: 3.227586 

tensor(0.)

Epoch: 16
Avg loss: 3.219754 

tensor(0.)

Epoch: 17
Avg loss: 3.213249 

tensor(0.)

Epoch: 18
Avg loss: 3.207367 

tensor(0.)

Epoch: 19
Avg loss: 3.200844 

tensor(0.)

Epoch: 20
Avg loss: 3.193749 

tensor(0.)

Epoch: 21
Avg loss: 3.188567 

tensor(0.)

Epoch: 22
Avg loss: 3.182839 

tensor(0.)

Epoch: 23
Avg loss: 3.177600 

tensor(0.)

Epoch: 24
Avg loss:

In [9]:
net.linear1.weight.abs().mean((0))[0:5]

tensor([0.2807, 0.1444, 0.2042, 0.4275, 0.2266], grad_fn=<SliceBackward0>)

In [10]:
net.linear1.weight.abs().mean((0))[5:1000]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 