In [1]:
%cd hypernet

/home/z1157095/hypernet-cnn/hypernet


In [2]:
from dotenv import load_dotenv
load_dotenv()

import random
import os

In [3]:
from comet_ml import Experiment, Optimizer

import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.functional as F
import torch.utils.data as data_utils
import pandas as pd
from collections import defaultdict

torch.set_default_dtype(torch.float32)

In [4]:
from torchsummary import summary
import matplotlib.pyplot as plt
from tqdm import tqdm, trange

In [5]:
import tabular_hypernet as hp
from tabular_hypernet.mask_design import feature_variances

In [6]:
os.environ['COMET_KEY'] = 'UXrV5UxyhTK3cyQNG6BDuc4bE'
os.environ.get("COMET_KEY")

'UXrV5UxyhTK3cyQNG6BDuc4bE'

In [7]:
# TODO

# TabSSLCrossEntropyLoss if confidence is high for both masks, add selfsl loss

# Verify if masks always divided into pairs the same way

In [8]:
class SSLCELossWithThreshold(torch.nn.Module):
    def __init__(self, beta=0.1, unsup_target_wrapper=torch.nn.functional.softmax, threshold=None):
        super(SSLCELossWithThreshold, self).__init__()
        
        self.y_f1 = torch.nn.CrossEntropyLoss()
        self.y_f2 = torch.nn.CrossEntropyLoss()
        
        self.f1_f2 = torch.nn.CrossEntropyLoss()
        self.f2_f1 = torch.nn.CrossEntropyLoss()
        
        self.beta = beta
        self.unsup_target_wrapper = unsup_target_wrapper
        self.threshold = threshold
        
    def is_observ_above_threshold(self, data):
        mask = torch.any(data >= self.threshold, dim=1)
        
        return mask
        
    
    def forward(self, sup_input, unsup_input):
        sup_outputs1, sup_outputs2, sup_labels = sup_input
        unsup_outputs1, unsup_outputs2 = unsup_input
        
        self.supervised_loss = self.y_f1(sup_outputs1, sup_labels) + self.y_f2(sup_outputs2, sup_labels)
        
        self.self_supervised_loss = 0
        if self.beta:

            if self.threshold:
                unsup_outputs1_target = torch.nn.functional.softmax(unsup_outputs1, dim=1)
                mask1 = self.is_observ_above_threshold(unsup_outputs1_target)

                if len(unsup_outputs1_target[mask1]):
                    unsup_outputs1_target = torch.argmax(unsup_outputs1_target[mask1], dim=1)
                    self.self_supervised_loss += self.f2_f1(unsup_outputs2[mask1], unsup_outputs1_target)

                unsup_outputs2_target = torch.nn.functional.softmax(unsup_outputs2, dim=1)
                mask2 = self.is_observ_above_threshold(unsup_outputs2_target)

                if len(unsup_outputs2_target[mask2]):
                    unsup_outputs2_target = torch.argmax(unsup_outputs2_target[mask2], dim=1)
                    self.self_supervised_loss += self.f1_f2(unsup_outputs1[mask2], unsup_outputs2_target)

            else:
                self.self_supervised_loss = self.f1_f2(unsup_outputs1, self.unsup_target_wrapper(unsup_outputs2, dim=1)) \
                                    + self.f2_f1(unsup_outputs2, self.unsup_target_wrapper(unsup_outputs1, dim=1))      
        
        return self.supervised_loss + self.beta * self.self_supervised_loss


### Exploration of dataset variances over features and analisys of best temp schedule

In [12]:
fv = feature_variances(dataset)


KeyboardInterrupt



In [None]:
temp_scheduler = IncByOneTempScheduler(1, n=mask_size, max_temp=1)

In [None]:
temp_scheduler.available_temps

In [None]:
for i in range(mask_size):

    temp = temp_scheduler.update_temp(i)
    
    probs = torch.nn.functional.softmax(fv / temp, dtype=torch.float32, dim=0).numpy()

    print('temp', temp)
    print('max', np.max(probs))
    print('min', np.min(probs))
    print('abs delta', np.max(probs) - np.min(probs))
    print('max bigger than min', np.max(probs) / np.min(probs), 'times')
    plt.clf()
    plt.plot(probs)
    plt.show()

# Setup for training

In [24]:
seed = 5

In [25]:
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

In [33]:
mask_size = 100

In [27]:
dataset = hp.semisl.get_train_test_sets()

In [32]:
epochs = 100

masks_no = 100


results = defaultdict(list)
size = (100, 59900)


### New mask selector: Variance with temp scheduler 

### Temp scheduler 1..20

In [31]:
for beta in [0.]:
    for lr in [3e-5]:
        
        temp_scheduler = IncByOneTempScheduler(1, n=mask_size, max_temp=20)
        mask_selector = VarianceWithSoftmaxMasksSelector(feature_variances(dataset), mask_size, temp_scheduler)

        criterion = SSLCELossWithThreshold(beta=beta, threshold=None)

        np.random.seed(seed)
        torch.manual_seed(seed)
        random.seed(seed)

        # dataset


        hypernet = hp.Hypernetwork(
            architecture=torch.nn.Sequential(
                torch.nn.Linear(784, 64), 
                torch.nn.ReLU(),
                torch.nn.Linear(64, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 128),
            ),
            target_architecture=[(mask_size, 20), (20, 10)],
            test_nodes=masks_no,
        ).cuda()
        
        hypernet._create_mask = None
        hypernet.test_mask = mask_selector(hypernet, hypernet.test_nodes)
        

        hypernet = hypernet.train()



        optimizer = torch.optim.Adam(hypernet.parameters(), lr=lr)

        # loaders
        sup_trainloader, unsup_trainloader, testloader = hp.semisl.get_dataloaders(dataset=dataset, size=size, batch_size=32, test_batch_size=64)
        trainloader = hp.semisl.TrainDataLoaderSemi(sup_trainloader, unsup_trainloader)

        results[size].append(hp.semisl.train_semisl(hypernet,
                                          optimizer,
                                          criterion,
                                          (trainloader, testloader), 
                                          size,
                                          epochs,
                                          masks_no,
                                          changing_beta=None,
                                          log_to_comet=False,
                                          project_name="mask-selection",
                                          tags=['best hyperparam'],
                                          description="""
                                          masks are selected based on feature variance with softmax and temp
                                          """,
                                        log_params={'seed': seed, 'temp scheduler': 'inc by one 1..20', 'max_temp' : 20}
                                        ))


KeyboardInterrupt: 

### Temp scheduler 1..11

In [None]:
for beta in [0.]:
    for lr in [3e-5]:
        
        temp_scheduler = IncByOneTempScheduler(1, n=mask_size, max_temp=11)
        mask_selector = VarianceWithSoftmaxMasksSelector(feature_variances(dataset), mask_size, temp_scheduler)

        criterion = SSLCELossWithThreshold(beta=beta, threshold=None)

        np.random.seed(seed)
        torch.manual_seed(seed)
        random.seed(seed)

        # dataset


        hypernet = hp.Hypernetwork(
            architecture=torch.nn.Sequential(
                torch.nn.Linear(784, 64), 
                torch.nn.ReLU(),
                torch.nn.Linear(64, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 128),
            ),
            target_architecture=[(mask_size, 10), (10, 10)],
            test_nodes=masks_no,
        ).cuda()
        hypernet._create_mask = None
        hypernet.test_mask = mask_selector(hypernet, hypernet.test_nodes)

        hypernet = hypernet.train()



        optimizer = torch.optim.Adam(hypernet.parameters(), lr=lr)

        # loaders
        sup_trainloader, unsup_trainloader, testloader = hp.semisl.get_dataloaders(dataset=dataset, size=size, batch_size=32, test_batch_size=64)
        trainloader = hp.semisl.TrainDataLoaderSemi(sup_trainloader, unsup_trainloader)

        results[size].append(hp.semisl.train_semisl(hypernet,
                                          optimizer,
                                          criterion,
                                          (trainloader, testloader), 
                                          size,
                                          epochs,
                                          masks_no,
                                          changing_beta=None,
                                          log_to_comet=True,
                                          project_name="mask-selection",
                                          tags=['best hyperparam'],
                                          description="""
                                          masks are selected based on feature variance with softmax and temp
                                          """,
                                        log_params={'seed': seed, 'temp scheduler': 'inc by one 1..11', 'max_temp' : 11}
                                        ))


### Temp scheduler 1..1

In [None]:
for beta in [0.]:
    for lr in [3e-5]:
        
        temp_scheduler = IncByOneTempScheduler(1, n=mask_size, max_temp=1)
        mask_selector = VarianceWithSoftmaxMasksSelector(feature_variances(dataset), mask_size, temp_scheduler)

        criterion = SSLCELossWithThreshold(beta=beta, threshold=None)

        np.random.seed(seed)
        torch.manual_seed(seed)
        random.seed(seed)

        # dataset


        hypernet = hp.Hypernetwork(
            architecture=torch.nn.Sequential(
                torch.nn.Linear(784, 64), 
                torch.nn.ReLU(),
                torch.nn.Linear(64, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 128),
            ),
            target_architecture=[(mask_size, 10), (10, 10)],
            test_nodes=masks_no,
        ).cuda()
        hypernet._create_mask = None
        hypernet.test_mask = mask_selector(hypernet, hypernet.test_nodes)

        hypernet = hypernet.train()



        optimizer = torch.optim.Adam(hypernet.parameters(), lr=lr)

        # loaders
        sup_trainloader, unsup_trainloader, testloader = hp.semisl.get_dataloaders(dataset=dataset, size=size, batch_size=32, test_batch_size=64)
        trainloader = hp.semisl.TrainDataLoaderSemi(sup_trainloader, unsup_trainloader)

        results[size].append(hp.semisl.train_semisl(hypernet,
                                          optimizer,
                                          criterion,
                                          (trainloader, testloader), 
                                          size,
                                          epochs,
                                          masks_no,
                                          changing_beta=None,
                                          log_to_comet=True,
                                          project_name="mask-selection",
                                          tags=['best hyperparam'],
                                          description="""
                                          masks are selected based on feature variance with softmax and temp
                                          """,
                                        log_params={'seed': seed, 'temp scheduler': '1..1', 'max_temp' : 1}
                                        ))


### Half half temp scheduler

In [None]:
for beta in [0.]:
    for lr in [3e-5]:
        
        temp_scheduler = HalfHalfTempScheduler(1, n=mask_size, max_temp=11)
        mask_selector = VarianceWithSoftmaxMasksSelector(feature_variances(dataset), mask_size, temp_scheduler)

        criterion = SSLCELossWithThreshold(beta=beta, threshold=None)

        np.random.seed(seed)
        torch.manual_seed(seed)
        random.seed(seed)

        # dataset


        hypernet = hp.Hypernetwork(
            architecture=torch.nn.Sequential(
                torch.nn.Linear(784, 64), 
                torch.nn.ReLU(),
                torch.nn.Linear(64, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 128),
            ),
            target_architecture=[(mask_size, 10), (10, 10)],
            test_nodes=masks_no,
        ).cuda()
        hypernet._create_mask = None
        hypernet.test_mask = mask_selector(hypernet, hypernet.test_nodes)

        hypernet = hypernet.train()



        optimizer = torch.optim.Adam(hypernet.parameters(), lr=lr)

        # loaders
        sup_trainloader, unsup_trainloader, testloader = hp.semisl.get_dataloaders(dataset=dataset, size=size, batch_size=32, test_batch_size=64)
        trainloader = hp.semisl.TrainDataLoaderSemi(sup_trainloader, unsup_trainloader)

        results[size].append(hp.semisl.train_semisl(hypernet,
                                          optimizer,
                                          criterion,
                                          (trainloader, testloader), 
                                          size,
                                          epochs,
                                          masks_no,
                                          changing_beta=None,
                                          log_to_comet=True,
                                          project_name="mask-selection",
                                          tags=['best hyperparam'],
                                          description="""
                                          masks are selected based on feature variance with softmax and temp
                                          """,
                                        log_params={'seed': seed, 'temp scheduler': 'half half', 'max_temp' : 11}
                                        ))


In [None]:
#68.6   1.78

### No feature selection

In [None]:
for beta in [0.]:

    for lr in [3e-5]:


        criterion = SSLCELossWithThreshold(beta=beta, threshold=None)

        np.random.seed(seed)
        torch.manual_seed(seed)
        random.seed(seed)

        # dataset


        hypernet = hp.Hypernetwork(
            architecture=torch.nn.Sequential(
                torch.nn.Linear(784, 64), 
                torch.nn.ReLU(),
                torch.nn.Linear(64, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 128),
            ),
            target_architecture=[(mask_size, 10), (10, 10)],
            test_nodes=masks_no,
        ).cuda()
        
        hypernet = hypernet.train()



        optimizer = torch.optim.Adam(hypernet.parameters(), lr=lr)

        # loaders
        sup_trainloader, unsup_trainloader, testloader = hp.semisl.get_dataloaders(dataset=dataset, size=size, batch_size=32, test_batch_size=64)
        trainloader = hp.semisl.TrainDataLoaderSemi(sup_trainloader, unsup_trainloader)

        results[size].append(hp.semisl.train_semisl(hypernet,
                                          optimizer,
                                          criterion,
                                          (trainloader, testloader), 
                                          size,
                                          epochs,
                                          masks_no,
                                          changing_beta=None,
                                          log_to_comet=True,
                                          project_name="mask-selection",
                                          tags=['best hyperparam'],
                                          description="""
                                          masks are selected based on feature variance with softmax and temp
                                          """,
                                        log_params={'seed': seed, 'temp scheduler': 'none', 'mask selection': 'none'}
                                        ))
