In [1]:
import rpy2.robjects.packages as rpackages

# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list

<rpy2.rinterface_lib.sexp.NULLType object at 0x7f3c9c327c40> [RTYPES.NILSXP]

In [47]:
# utils.install_packages('logisticPCA')

In [2]:
%cd hypernet

/home/z1157095/hypernet-cnn/hypernet


In [3]:
from dotenv import load_dotenv
load_dotenv()

import random
import os

In [4]:
from comet_ml import Experiment, Optimizer

import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.functional as F
import torch.utils.data as data_utils
import pandas as pd
from collections import defaultdict

torch.set_default_dtype(torch.float32)

In [5]:
from torchsummary import summary
import matplotlib.pyplot as plt
from tqdm import tqdm, trange

In [6]:
import tabular_hypernet as hp

In [7]:
# hp.lpca

In [8]:
# hp.training_utils.get_dataset

In [9]:
os.environ['COMET_KEY'] = 'UXrV5UxyhTK3cyQNG6BDuc4bE'
os.environ.get("COMET_KEY")

'UXrV5UxyhTK3cyQNG6BDuc4bE'

In [10]:
# add shuffle for supervised


In [11]:
import torch
import numpy as np
from tabular_hypernet.modules import InsertableNet
import enum
import torch.nn.functional as F

torch.set_default_dtype(torch.float32)

class TrainingModes(enum.Enum):
    SLOW_STEP = "slow-step"
    CARTHESIAN = "carth"

In [12]:
class Hypernetwork(torch.nn.Module):
    def __init__(
        self,
        architecture=torch.nn.Sequential(torch.nn.Linear(784, 128), 
                        torch.nn.ReLU(),
                        torch.nn.Linear(128, 64),
                        torch.nn.ReLU(),
                        torch.nn.Linear(64, 64)
                       ),
        target_architecture=[(20, 10), (10, 10)],
        test_nodes=100,
        mode=TrainingModes.SLOW_STEP,
        device="cuda:0",
        mask_len=None,
    ):
        """ Initialize a hypernetwork.
        Args:
            target_inp_size - size of input
            out_size - size of output
            layers - list of hidden layer sizes
            test_nodes - number of test nodes
            device - device to use
        """
        super().__init__()
        self.target_outsize = target_architecture[-1][-1]
        self.mask_size = target_architecture[0][0]
        self.target_architecture = target_architecture
        self.device = device
        self.mode = mode

        self.out_size = self.calculate_outdim(target_architecture)

        self.model = architecture.to('cpu')
        gen = self.model.parameters()
        self.input_size = next(gen).size()[1]
        out_dim = self.model(torch.rand(1, self.input_size)).shape
        print(out_dim)
        output_layer = torch.nn.Linear(out_dim[1], self.out_size)
        self.model.add_module("output_layer", output_layer)
        self.model = self.model.to(device)
        
        self.dropout = torch.nn.Dropout()

        self.relu = torch.relu
        self.template = np.zeros(mask_len if mask_len else self.input_size)
        self.test_nodes = test_nodes
        self.test_mask = self._create_mask(test_nodes)

        self._retrained = True
        self._test_nets = None
        
    def calculate_outdim(self, architecture):
        weights = 0
        for layer in architecture:
            weights += layer[0]*layer[1]+layer[1]
        return weights

    def to(self, device):
        super().to(device)
        self.device = device
        self.test_mask = self._create_mask(self.test_nodes)
        self.model = self.model.to(device)
        return self

    def _slow_step_training(self, data, mask, pc_mask=None):
        weights = self.craft_network(pc_mask[:1] if pc_mask != None else mask[:1])
        mask = mask[0].to(torch.bool)
        nn = InsertableNet(
            weights[0],
            self.target_architecture,
        )

        masked_data = data[:, mask]
        res = nn(masked_data)
        return res

    def _external_mask_training(self, data, mask):
        recalculate = [True] * len(mask)
        for i in range(1, len(mask)):
            if torch.equal(mask[i - 1], mask[i]):
                recalculate[i] = False

        weights = self.craft_network(mask)
        mask = mask.to(torch.bool)

        res = torch.zeros((len(data), self.target_outsize)).to(self.device)
        for i in range(len(data)):
            if recalculate[i]:
                nn = InsertableNet(
                    weights[i],
                    self.target_architecture,
                )
            masked_data = data[i, mask[i]]
            res[i] = nn(masked_data)
        return res

    def forward(self, data, mask=None, pc_mask=None):
        """Get a hypernet prediction.
        During training we use a single target network per sample.
        During eval, we create a network for each test mask and average their results

        Args:
            data - prediction input
            mask - either None or a torch.tensor((data.shape[0], data.shape[1])).
        """
        if self.training:
            self._retrained = True
            if self.mode == TrainingModes.SLOW_STEP or self.mode == TrainingModes.CARTHESIAN:
                return self._slow_step_training(data, mask, pc_mask)

            if mask is None:
                mask = self._create_mask(len(data))

            return self._external_mask_training(data, mask)
        else:
            return self._ensemble_inference(data, mask)

    def _ensemble_inference(self, data, mask):
        if mask is None:
            mask = self.test_mask
            nets = self._get_test_nets()
        else:
            nets = self.__craft_nets(mask)
        mask = mask.to(torch.bool)

        res = torch.zeros((len(data), self.target_outsize)).to(self.device)
        for i in range(len(mask)):
            nn = nets[i]
            masked_data = data[:, mask[i]]
            res += nn(masked_data)
        res /= len(mask)
        if res.shape[1] > 1:
            res = F.softmax(res, 1)
        return res

    def _get_test_nets(self):
        if self._retrained:
            nets = self.__craft_nets(self.pcs if hasattr(self, 'pcs') else self.test_mask)
            self._test_nets = nets
            self._retrained = False
        return self._test_nets

    def __craft_nets(self, mask):
        nets = []
        weights = self.craft_network(mask.to(torch.float32))
        for i in range(len(mask)):
            nn = InsertableNet(
                weights[i],
                self.target_architecture,
            )
            nets.append(nn)
        return nets

    @staticmethod
    def random_choice_noreplace2(l, n_sample, num_draw):
        '''
        l: 1-D array or list
        n_sample: sample size for each draw
        num_draw: number of draws

        Intuition: Randomly generate numbers, get the index of the smallest n_sample number for each row.
        '''
        l = np.array(l)
        return l[np.argpartition(np.random.rand(num_draw,len(l)), n_sample-1,axis=-1)[:,:n_sample]]
    
    def _create_mask(self, count):
        # masks = np.random.choice((len(self.template)), (count, self.mask_size), False)
        masks = Hypernetwork.random_choice_noreplace2(np.arange(len(self.template)), self.mask_size, count)
        tmp = np.array([self.template.copy() for _ in range(count)])
        for i, mask in enumerate(masks):
            tmp[i, mask] = 1
        mask = torch.from_numpy(tmp).to(torch.float32).to(self.device)
        return mask

    def craft_network(self, mask):
        out = self.model(mask)
        # out = self.output_layer(out)
        return out

In [13]:
def train_with_pcs(hypernet, optimizer, criterion, loaders, data_size, epochs, masks_no, 
                    experiment=None,
                    tag="lpca", 
                    device='cuda:0', 
                    project_name="hypernetwork",
                    test_every=5,
                    log_params={},
                  ):
    """ Train hypernetwork using slow step method - use the same mask for a whole batch, change it once per iteration."""
    if experiment is None:
        experiment = Experiment(api_key=os.environ.get("COMET_KEY"), project_name=project_name, display_summary_level=0)
        experiment.add_tag(tag)
    experiment.log_parameter("test_nodes", hypernet.test_nodes)
    experiment.log_parameter("mask_size", hypernet.mask_size)
    experiment.log_parameter("lr", optimizer.defaults['lr'])
    experiment.log_parameter("training_size", data_size)
    experiment.log_parameter("masks_no", masks_no)
    experiment.log_parameter("max_epochs", epochs)
    experiment.log_parameter("check_val_every_n_epoch", test_every)
    
    for k, v in log_params.items():
        experiment.log_parameter(k, v)

    trainloader, testloader = loaders
    test_loss = []
    test_accs = []
    mask_idx = 0
    with trange(epochs) as t:
        for epoch in t:
            hypernet.train()
            for i, data in enumerate(trainloader):
                try:
                    inputs, labels, _ = data
                except ValueError:
                    inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                #Masks
                masks = hypernet.test_mask[mask_idx].repeat(len(inputs), 1)
                
                #Principal components 
                pcs = hypernet.pcs[mask_idx].repeat(len(inputs), 1) if hasattr(hypernet, 'pcs') else None
                
                
                mask_idx = (mask_idx+1) % len(hypernet.test_mask)

                optimizer.zero_grad()
                outputs = hypernet(inputs, masks, pcs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            total_loss = 0
            correct = 0
            denom = 0

            hypernet.eval()
            if epoch%test_every==0:
                for i, data in enumerate(testloader):
                    try:
                        images, labels, _ = data
                    except ValueError:
                        images, labels = data
                    images = images.to(device)
                    labels = labels.to(device)

                    denom += len(labels)

                    outputs = hypernet(images)
                    _, predicted = torch.max(outputs.data, 1)
                    correct += (predicted == labels).sum().item()
                    total_loss += criterion(outputs, labels).item()

                test_loss.append(total_loss/denom)
                test_accs.append(correct/denom*100)

                t.set_postfix(test_acc=correct/denom*100, loss=total_loss/i)
                experiment.log_metric("test_accuracy", correct/len(testloader.dataset)*100, step=epoch)
                experiment.log_metric("test_loss", test_loss[-1], step=epoch)

    experiment.end()    
    return max(test_accs), test_loss[np.argmax(test_accs)]

### Setup for training

In [14]:
seed = 5

In [15]:
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

In [16]:
mask_size = 100

In [17]:
dataset = hp.semisl.get_train_test_sets()

In [18]:
epochs = 3000

masks_no = 100

results = defaultdict(list)
size = 100

In [19]:

k = 100 # has to be less or equal than masks_no

lr = 3e-5

### Test several components quantities

In [20]:
for k in [100, 50, 20, 10]:

    criterion = torch.nn.CrossEntropyLoss()
    
    # seeds
    np.random.seed(seed)
    torch.manual_seed(seed)
    random.seed(seed)


    hypernet = Hypernetwork(
        architecture=torch.nn.Sequential(
            torch.nn.Linear(k, 64), 
            torch.nn.ReLU(),
            torch.nn.Linear(64, 256),
            torch.nn.ReLU(),
            torch.nn.Linear(256, 128),
        ),
        target_architecture=[(mask_size, 100), (100, 10)],
        test_nodes=masks_no,
        mask_len=784
    ).cuda()

    hypernet._create_mask = None
    generated_masks = np.array(hypernet.test_mask.cpu())

    print('hypernet.test_mask.shape', hypernet.test_mask.shape)
    print('generated_masks.shape', generated_masks.shape)

    m = 2

    # m = hp.lpca.select_m(generated_masks, ks=50)

    pcs = hp.lpca.get_LPCs(generated_masks, k, m)

    hypernet.pcs = torch.tensor(pcs).to(hypernet.device).float()


    hypernet = hypernet.train()


    optimizer = torch.optim.Adam(hypernet.parameters(), lr=lr)

    # loaders
    trainloader, testloader = hp.training_utils.get_dataset(size, test_batch_size=32)

    res = train_with_pcs(
        hypernet,
        optimizer,
        criterion,
        (trainloader, testloader),
        size,
        epochs,
        masks_no,
        tag='lpca-init-grid-search',
        test_every=5,
        log_params={
            'k':k,
            'seed': seed,
        }
    )




torch.Size([1, 128])
hypernet.test_mask.shape torch.Size([100, 784])
generated_masks.shape (100, 784)


COMET INFO: Experiment is live on comet.ml https://www.comet.com/abulenok/hypernetwork/d096ff2f725740df8ec988ce57b360e5

 90%|███████████████████████████████████████████████████████▋      | 2695/3000 [1:10:34<03:31,  1.44it/s, loss=1.77, test_acc=71]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|████████████████████████████████████████████████████████████| 3000/3000 [1:10:54<00:00,  1.42s/it, loss=1.78, test_acc=70.5]
COMET ERROR: Error sending a notification, make sure you have opted-in for notifications
COMET INFO: Uploading 1 metrics, params and output messages


### Baseline

In [21]:
criterion = torch.nn.CrossEntropyLoss()

# seeds
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)


hypernet = Hypernetwork(
    architecture=torch.nn.Sequential(
        torch.nn.Linear(784, 64), 
        torch.nn.ReLU(),
        torch.nn.Linear(64, 256),
        torch.nn.ReLU(),
        torch.nn.Linear(256, 128),
    ),
    target_architecture=[(mask_size, 100), (100, 10)],
    test_nodes=masks_no,
).cuda()

hypernet._create_mask = None
generated_masks = np.array(hypernet.test_mask.cpu())

print('hypernet.test_mask.shape', hypernet.test_mask.shape)
print('generated_masks.shape', generated_masks.shape)

hypernet = hypernet.train()


optimizer = torch.optim.Adam(hypernet.parameters(), lr=lr)

# loaders
trainloader, testloader = hp.training_utils.get_dataset(size, test_batch_size=32)

res = train_with_pcs(
    hypernet,
    optimizer,
    criterion,
    (trainloader, testloader),
    size,
    epochs,
    masks_no,
    tag='lpca-init-grid-search',
    test_every=5,
    log_params={
        'k': 'none',
        'seed': seed,
    }
)

torch.Size([1, 128])
hypernet.test_mask.shape torch.Size([100, 784])
generated_masks.shape (100, 784)


COMET INFO: Experiment is live on comet.ml https://www.comet.com/abulenok/hypernetwork/1b8720ee74bc41b9b0a0ca17ca66d8c7

 83%|███████████████████████████████████████████████████▌          | 2496/3000 [59:05<21:45,  2.59s/it, loss=1.78, test_acc=71.2]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

