In [None]:
import torch 
import torch.optim as optim 
import torch.nn as nn 
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange

from models import NN
from compute_score import compute_score

DEVICE="cpu"

In [5]:
transform = Compose([
                    ToTensor(),
                    Normalize((0.1307,), (0.3081,))
                    ])

trainset = MNIST(root="../../../coding/Dataset/", train=True, download=False, transform=transform)
trainloader = DataLoader(trainset, batch_size=256, shuffle=True)

class_to_data = {i:[] for i in range(10)}

for data, label in trainset:
    class_to_data[label].append(data)

for class_label in class_to_data:
    class_to_data[class_label] = torch.stack(class_to_data[class_label]).to(DEVICE)

# Hidden Layer of Size 10

In [None]:
all_losses_10 = []
all_borne_infinie_10 = []
all_borne_finie_10 = []

for i in range(10):
    print(f"**** model {i} ****")

    model = NN(hidden_dim=10).to(DEVICE)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-5)

    records_loss = []
    records_borne_timesteps = []
    records_borne_lambda0 = []
    records_loss_exp = []

    lambda_min_init, classes = compute_score(model, trainset, device=DEVICE)

    full_score_init = 0
    score_init = []

    for c in classes:
        score_c = torch.sum((1 - F.softmax(model(class_to_data[c]), dim=1)[:,c])**2)
        full_score_init+=score_c
    full_score_init/=len(trainset)
    borne_init = full_score_init

    records_borne_timesteps.append(borne_init.item())
    records_borne_lambda0.append(borne_init.item())
    records_loss_exp.append(borne_init.item())
    
    pbar = trange(10)
    for epoch in pbar:
        model.train()

        running_loss = 0
        for i, (inputs, targets) in enumerate(trainloader):
            preds = model(inputs)
            loss = criterion(preds, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss+=loss.item()
        pbar.set_description(f"Epoch : {epoch+1} : loss {running_loss}")

        model.eval()
        lambda_min, classes = compute_score(model, trainset)

        full_score = 0
        borne_timesteps = 0
        borne_lambda0 = 0

        for c in classes:
            score_c = sum((1 - F.softmax(model(class_to_data[c]), dim=1)[:,c])**2)
            full_score+=score_c

        borne_timesteps = np.exp(-lambda_min*(epoch+1))*full_score_init.detach()
        borne_lambda0 = np.exp(-lambda_min_init*(epoch+1))*full_score_init.detach()

        full_score = full_score.item()/len(trainset)
        records_borne_timesteps.append(borne_timesteps)
        records_borne_lambda0.append(borne_lambda0)
        records_loss_exp.append(full_score)
        records_loss.append(running_loss)

    all_losses_10.append(records_loss_exp)
    all_borne_finie_10.append(records_borne_timesteps)
    all_borne_infinie_10.append(records_borne_lambda0)

plt.plot(np.mean(all_losses_10, axis=0), c="red", label="train loss")
plt.plot(np.mean(all_borne_finie_10, axis=0), c="blue", label="bound $\lambda_t$")
plt.plot(np.mean(all_borne_infinie_10, axis=0), c="green", label="bound $\lambda_0$")
plt.title('Hidden layer size : 10')
plt.legend()

# Hidden Layer of Size 100

In [None]:
all_losses_100 = []
all_borne_infinie_100 = []
all_borne_finie_100 = []

for i in range(10):
    print(f"**** model {i} ****")

    model = NN(hidden_dim=100).to(DEVICE)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-5)

    records_loss = []
    records_borne_timesteps = []
    records_borne_lambda0 = []
    records_loss_exp = []

    lambda_min_init, classes = compute_score(model, trainset, device=DEVICE)

    full_score_init = 0
    score_init = []

    for c in classes:
        score_c = torch.sum((1 - F.softmax(model(class_to_data[c]), dim=1)[:,c])**2)
        full_score_init+=score_c
    full_score_init/=len(trainset)
    borne_init = full_score_init

    records_borne_timesteps.append(borne_init.item())
    records_borne_lambda0.append(borne_init.item())
    records_loss_exp.append(borne_init.item())
    
    pbar = trange(10)
    for epoch in pbar:
        model.train()

        running_loss = 0
        for i, (inputs, targets) in enumerate(trainloader):
            preds = model(inputs)
            loss = criterion(preds, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss+=loss.item()
        pbar.set_description(f"Epoch : {epoch+1} : loss {running_loss}")

        model.eval()
        lambda_min, classes = compute_score(model, trainset)

        full_score = 0
        borne_timesteps = 0
        borne_lambda0 = 0

        for c in classes:
            score_c = sum((1 - F.softmax(model(class_to_data[c]), dim=1)[:,c])**2)
            full_score+=score_c

        borne_timesteps = np.exp(-lambda_min*(epoch+1))*full_score_init.detach()
        borne_lambda0 = np.exp(-lambda_min_init*(epoch+1))*full_score_init.detach()

        full_score = full_score.item()/len(trainset)
        records_borne_timesteps.append(borne_timesteps)
        records_borne_lambda0.append(borne_lambda0)
        records_loss_exp.append(full_score)
        records_loss.append(running_loss)

    all_losses_100.append(records_loss_exp)
    all_borne_finie_100.append(records_borne_timesteps)
    all_borne_infinie_100.append(records_borne_lambda0)

plt.plot(np.mean(all_losses_100, axis=0), c="red", label="train loss")
plt.plot(np.mean(all_borne_finie_100, axis=0), c="blue", label="bound $\lambda_t$")
plt.plot(np.mean(all_borne_infinie_100, axis=0), c="green", label="bound $\lambda_0$")
plt.title('Hidden layer size : 100')
plt.legend()

# Hidden Layer of Size 1000

In [None]:
all_losses_1000 = []
all_borne_infinie_1000 = []
all_borne_finie_1000 = []

for i in range(10):
    print(f"**** model {i} ****")

    model = NN(hidden_dim=1000).to(DEVICE)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-5)

    records_loss = []
    records_borne_timesteps = []
    records_borne_lambda0 = []
    records_loss_exp = []

    lambda_min_init, classes = compute_score(model, trainset, device=DEVICE)

    full_score_init = 0
    score_init = []

    for c in classes:
        score_c = torch.sum((1 - F.softmax(model(class_to_data[c]), dim=1)[:,c])**2)
        full_score_init+=score_c
    full_score_init/=len(trainset)
    borne_init = full_score_init

    records_borne_timesteps.append(borne_init.item())
    records_borne_lambda0.append(borne_init.item())
    records_loss_exp.append(borne_init.item())
    
    pbar = trange(10)
    for epoch in pbar:
        model.train()

        running_loss = 0
        for i, (inputs, targets) in enumerate(trainloader):
            preds = model(inputs)
            loss = criterion(preds, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss+=loss.item()
        pbar.set_description(f"Epoch : {epoch+1} : loss {running_loss}")

        model.eval()
        lambda_min, classes = compute_score(model, trainset)

        full_score = 0
        borne_timesteps = 0
        borne_lambda0 = 0

        for c in classes:
            score_c = sum((1 - F.softmax(model(class_to_data[c]), dim=1)[:,c])**2)
            full_score+=score_c

        borne_timesteps = np.exp(-lambda_min*(epoch+1))*full_score_init.detach()
        borne_lambda0 = np.exp(-lambda_min_init*(epoch+1))*full_score_init.detach()

        full_score = full_score.item()/len(trainset)
        records_borne_timesteps.append(borne_timesteps)
        records_borne_lambda0.append(borne_lambda0)
        records_loss_exp.append(full_score)
        records_loss.append(running_loss)

    all_losses_1000.append(records_loss_exp)
    all_borne_finie_1000.append(records_borne_timesteps)
    all_borne_infinie_1000.append(records_borne_lambda0)

plt.plot(np.mean(all_losses_1000, axis=0), c="red", label="train loss")
plt.plot(np.mean(all_borne_finie_1000, axis=0), c="blue", label="bound $\lambda_t$")
plt.plot(np.mean(all_borne_infinie_1000, axis=0), c="green", label="bound $\lambda_0$")
plt.title('Hidden layer size : 1000')
plt.legend()