In [1]:
import argparse
from datetime import date
import os
import numpy as np
import copy
from datetime import datetime
from progress.bar import ChargingBar as Bar
import queue
import math

import sys 
sys.path.append('..')

from utils_IS import *
from evaluate import *
from S_DeepEns2.models import *

In [2]:
seed = 0

np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic=True

trainloader, testloader, n_train, n_test, p_data, num_classes = data_process('CIFAR10', '/home/ggong369/data/MBNN_ICML2023/datasets', seed, 100)

task = 'classification'
criterion = torch.nn.CrossEntropyLoss()

torch.cuda.set_device(3)

mt=0

model = M_ResNet18(num_classes).cuda()
model.load_state_dict(torch.load('/home/ggong369/data/MBNN_ICML2023/experiments' + '/pretrain/' + 'CIFAR10' + '/221128/epoch100_lr0.001/seed_'+str(seed)+'_mt_'+str(mt)+'.pt', map_location='cuda:'+str(3)))

Files already downloaded and verified
Files already downloaded and verified


<All keys matched successfully>

In [3]:
def train(model, task, dataloader, criterion, optimizer):
    model.train()
    if task == 'regression':        
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            optimizer.zero_grad()
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
    elif task == 'classification':
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.cuda(), targets.cuda().squeeze().long()
            outputs = model(inputs)
            optimizer.zero_grad()
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()        
            
def test(model, task, dataloader):
    ERROR = 0
    model.eval()
    with torch.no_grad():
        if task == 'regression':        
            for batch_idx, (inputs, targets) in enumerate(dataloader):
                inputs, targets = inputs.cuda(), targets.cuda()
                outputs = model(inputs)
                ERROR += ((targets - outputs)**2).sum()
            return torch.sqrt(ERROR/len(dataloader.dataset))
        
        elif task == 'classification':
            for batch_idx, (inputs, targets) in enumerate(dataloader):
                inputs, targets = inputs.cuda(), targets.cuda().squeeze().long()
                outputs = model(inputs)
                ERROR += (torch.argmax(outputs,1) != targets).sum()    
            return ERROR/len(dataloader.dataset)

In [4]:
original_dist=[filter_ranks(model.conv1, model.bn1)]           
for layer in model.modules():
    if isinstance(layer, M_BasicBlock):
        original_dist = original_dist + layer.get_2norm()

original_dist_stat = {}
for k, stat in enumerate(original_dist):
    a = stat.detach().cpu().numpy()
    original_dist_stat[k] = {'mean': np.mean(a), 'std': np.std(a)}

In [25]:
perturbation = []

for k in range(len(original_dist_stat)):
    scale = np.exp(float(np.random.normal(0, 0.1))) / original_dist_stat[k]['std']
    shift = float(np.random.normal(0, 0.1)- original_dist_stat[k]['mean']/ original_dist_stat[k]['std'])
    perturbation.append((scale, shift))
    
model_tmp = copy.deepcopy(model)
Masking_layers=[]
for name, param in model_tmp.named_parameters():
    if 'M_relu' in name:
        Masking_layers.append(param)

perturbed_dist = []
for k in range(len(original_dist_stat)):
    perturbed_dist.append(original_dist[k] * perturbation[k][0] + perturbation[k][1])

cutoff = torch.quantile(torch.hstack(perturbed_dist), 1 - 0.6)

for k in range(len(original_dist_stat)):
    idx = torch.where(perturbed_dist[k] < cutoff)[0]
    Masking_layers[k].data[idx] *= 0
    if len(idx)==len(Masking_layers[k]):
        idx = torch.argmax(perturbed_dist[k])
        Masking_layers[k].data[idx] += 1
                        
for k in range(len(original_dist_stat)):
    print(Masking_layers[k].data.sum())

tensor(64., device='cuda:3')
tensor(36., device='cuda:3')
tensor(43., device='cuda:3')
tensor(39., device='cuda:3')
tensor(41., device='cuda:3')
tensor(67., device='cuda:3')
tensor(95., device='cuda:3')
tensor(80., device='cuda:3')
tensor(91., device='cuda:3')
tensor(162., device='cuda:3')
tensor(152., device='cuda:3')
tensor(141., device='cuda:3')
tensor(164., device='cuda:3')
tensor(266., device='cuda:3')
tensor(313., device='cuda:3')
tensor(324., device='cuda:3')
tensor(264., device='cuda:3')


In [9]:
test(model_tmp, task, trainloader)

tensor(0.8716, device='cuda:3')

In [8]:
original_dist_stat

{0: {'mean': 1.4768939, 'std': 2.3195174},
 1: {'mean': 0.5173925, 'std': 0.4500004},
 2: {'mean': 1.1012728, 'std': 0.7282056},
 3: {'mean': 0.14502235, 'std': 0.059649426},
 4: {'mean': 1.5524901, 'std': 0.74017715},
 5: {'mean': 0.17625584, 'std': 0.07631205},
 6: {'mean': 1.630589, 'std': 0.45739505},
 7: {'mean': 0.24136806, 'std': 0.089614},
 8: {'mean': 2.289945, 'std': 1.0323045},
 9: {'mean': 0.19052489, 'std': 0.06343855},
 10: {'mean': 2.0062778, 'std': 0.4994632},
 11: {'mean': 0.51359177, 'std': 0.15950434},
 12: {'mean': 4.8510866, 'std': 1.9786859},
 13: {'mean': 0.4060076, 'std': 0.1599586},
 14: {'mean': 1.8804797, 'std': 0.44032103},
 15: {'mean': 0.06509709, 'std': 0.038639754},
 16: {'mean': 0.018037803, 'std': 0.017243184}}