In [1]:
import numpy as np
import mylibrary.nnlib as tnn
import matplotlib.pyplot as plt
import copy

from mpl_toolkits.mplot3d import Axes3D
import matplotlib

import torch
import torch.nn as nn

import mylibrary.datasets as datasets
import prunelib
from tqdm import tqdm
import random

import pickle
import copy

In [2]:
mnist = datasets.MNIST()
train_data, train_label_, test_data, test_label_ = mnist.load()

train_data = train_data / 255.
test_data = test_data / 255.

In [3]:
train_label = tnn.Logits.index_to_logit(train_label_)
train_size = len(train_label_)

In [4]:
xx = torch.Tensor(train_data)
test_data = torch.Tensor(test_data)
yy = torch.LongTensor(train_label_)

## Compare

In [5]:
input_dim = 784
output_dim = 10

In [6]:
"""
settings:
1,2,3 -> net = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 10),
)

4,5 -> net = nn.Sequential(
    nn.Linear(784, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
)

6, 7 -> net = nn.Sequential(
    nn.Linear(784, 200),
    nn.ReLU(),
    nn.Linear(200, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
)
8, 9 -> net = nn.Sequential(
    nn.Linear(784, 400),
    nn.ReLU(),
    nn.Linear(400, 300),
    nn.ReLU(),
    nn.Linear(300, 200),
    nn.ReLU(),
    nn.Linear(200, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
)
"""
print("Nets")

Nets


In [7]:
config0 = [256, 128, 64]
config1 = [100, 100, 100]
config2 = [200, 100, 100, 100]
config3 = [400, 300, 200, 100]
config4 = [400, 400]
config5 = [500]


configs = [config0, config1, config2, config3, config4, config5]

layer_dims = config0

In [8]:
def get_mlp(config, batch_norm=False, final_activation=None):
    config = [input_dim]+config
    layers = []
    for i in range(len(config)-1):
        l = nn.Linear(config[i], config[i+1])
        layers.append(l)
        if batch_norm:
            layers.append(nn.BatchNorm1d(config[i+1]))
        layers.append(nn.ReLU())
            
    l = nn.Linear(config[-1], output_dim)
    layers.append(l)
    if final_activation:
        layers.append(final_activation)
    return nn.Sequential(*layers)

In [9]:
def remove_batchnorm(net, layer_dims):
    net_ = get_mlp(layer_dims)
    count = 0
    for name, module in list(net._modules.items()):
        if isinstance(module, nn.BatchNorm1d):
            count += 1
            
    i = 0
    j = 0
    print(count)
    for _ in range(count):
        gamma = net[i+1].weight.data
        beta = net[i+1].bias.data
        mean = net[i+1].running_mean
        var = torch.sqrt(net[i+1].running_var)

        w = net[i].weight.data
        b = net[i].bias.data

        newW = (gamma/var).reshape(-1, 1)*w
        newb = beta + gamma/var*(b-mean)

        net_[j].weight.data *= 0.
        net_[j].bias.data *= 0

        net_[j].weight.data += newW
        net_[j].bias.data += newb

        i = i + 3
        j = j + 2
        
    net_[-1].weight.data *= 0
    net_[-1].bias.data *= 0
    net_[-1].weight.data += net[-1].weight.data
    net_[-1].bias.data += net[-1].bias.data
    
    return  net_

## Oracle Pruning Modified

In [10]:
class Importance_TaylorFO_Modified_BN(prunelib.Importance):
    
    def __init__(self, net, criterion, config=None):
        self.net = net
        self.config = config
        self.criterion = criterion
        
        self.activations = {}
        self.gradients = {}
        self.forward_hook = {}
        self.backward_hook = {}
        self.keys = []
        
        pass

    def add_hook(self):
        self.activations = {}
        self.gradients = {}
        self.forward_hook = {}
        self.backward_hook = {}
        self.keys = []
        
        modules = list(self.net._modules.items())
        for name, module in modules:
            if isinstance(module, torch.nn.BatchNorm1d):
#             if isinstance(module, torch.nn.Linear):
                hook = module.register_backward_hook(self.capture_gradients)
                self.backward_hook[module] = hook
                hook = module.register_forward_hook(self.capture_inputs)
                self.forward_hook[module] = hook
                
                self.activations[module] = None
                self.gradients[module] = None
                self.keys.append(module)
                
        for name, module in reversed(modules):
            if isinstance(module, torch.nn.Linear):
                hook = module.register_backward_hook(self.capture_gradients)
                self.backward_hook[module] = hook
                hook = module.register_forward_hook(self.capture_inputs)
                self.forward_hook[module] = hook
                
                self.activations[module] = None
                self.gradients[module] = None
                self.keys.append(module)
                break
                
    def remove_hook(self):
        for module in self.keys:
            hook = self.forward_hook[module]
            hook.remove()
            hook = self.backward_hook[module]
            hook.remove()
    
    def capture_inputs(self, module, inp, out):
        self.activations[module] = out.data
        
    def capture_gradients(self, module, gradi, grado):
        self.gradients[module] = grado[0]
        
    def gather_inputs_gradients(self, x, t):
        self.add_hook()

        self.net.zero_grad()
        y = self.net(x)
        error = self.criterion(y,t)
        error.backward()
        
        self.remove_hook()
        return
    
    
    def compute_significance(self, x, t, config=None, normalize=True, layerwise_norm=False):
        self.gather_inputs_gradients(x, t)
        
        if config is None:
            if self.config is None:
                raise ValueError("config is not known. Please specify the config.") 
            else:
                config = self.config
        
        ## compute importance score
        importance = []
        if config["grad_rescale"]:
            scaler = torch.norm(self.gradients[self.keys[-1]], p=2, dim=1, keepdim=True) + 1e-5

        for module in self.keys[:-1]:
            z = self.activations[module] * self.gradients[module]
            if config["grad_rescale"]:
                z = z / scaler
            if config["imp_norm"] == "abs":
                z = z.abs()
            elif config["imp_norm"] == "sq":
                z = z.pow(2)

            z = z.sum(dim=0).abs()
            if not config["allow_linear"]:
                apnz = torch.sum(self.activations[module] > 0., dim=0, dtype=torch.float)
                z = z*(1-apnz) * 4 ## tried on desmos.

            if layerwise_norm:
                z = z / torch.norm(z, p=2)

            importance.append(z)

        if normalize:
            sums = 0
            count = 0
            for imp in importance:
                sums += imp.sum()
                count += len(imp)
            divider = sums/count ## total importance is number of neurons
            for i in range(len(importance)):
                importance[i] = importance[i]/divider
            
        
#         del self.activations[self.keys[-1]]
        self.activations = {}
        self.gradients = {}
        self.forward_hook = {}
        self.backward_hook = {}
        
        return importance
    
    def get_aponz(self, std=True):
        if len(self.activations) < 1:
            print("Activation has not been accumulated.. run compute_significance function")
            return
        aponz = []
        std = []
        for module in self.keys[:-1]:
            apnz = torch.sum(self.activations[module] > 0., dim=0, dtype=torch.float)
            aponz.append(apnz)
            if std:
                std.append(self.activations[module].std())
        
        if std:
            return aponz, std
        return aponz

        

class Importance_Molchanov_BN(prunelib.Importance):

    def __init__(self, net, criterion):
        self.net = net
        self.criterion = criterion
        self.keys = []
        for name, module in list(self.net._modules.items()):
            if isinstance(module, torch.nn.BatchNorm1d):
                self.keys.append(module)
        
    def compute_significance(self, x, t, normalize=True, batch_size=32):

        importance = [0]*len(self.keys)
        bstrt = list(range(0, len(x), batch_size))
        bstop = bstrt[1:]+[len(x)]
        for i in tqdm(range(len(bstrt))):
            self.net.zero_grad()
            y = self.net(x[bstrt[i]:bstop[i]])
            error = self.criterion(y,t[bstrt[i]:bstop[i]])
            error.backward()
        
            ## compute importance for each input
            for j, module in enumerate(self.keys):
                z = (module.weight.data*module.weight.grad +\
                     module.bias.data*module.bias.grad).pow(2)
                importance[j] += z
                
        ## compute mean
        for i, module in enumerate(self.keys):
            importance[i] = importance[i]/len(bstrt) 

        if normalize:
            sums = 0
            count = 0
            for imp in importance:
                sums += imp.sum()
                count += len(imp)
            divider = sums/count ## total importance is number of neurons
            for i in range(len(importance)):
                importance[i] = importance[i]/divider
            
        return importance

## Define pruning function

In [11]:
class Pruner():
    
    def __init__(self, net, prune_mask=None):
        self.net = net
        self.keys = []
        self.prune_mask = {}
        self.forward_hook = {}
        
        self.activations = []
        
        for name, module in list(self.net._modules.items()):
            if isinstance(module, torch.nn.Linear):
                self.keys.append(module)

        if prune_mask is not None:
            self.add_prune_mask(prune_mask)
        self.remove_hook()
        
    def add_prune_mask(self, prune_mask):
        for module, pm in zip(self.keys[:-1], prune_mask):
            self.prune_mask[module] = pm.type(torch.float)
        self.prune_mask[self.keys[-1]] = torch.ones(self.keys[-1].out_features, dtype=torch.float)
            
        
    def prune_neurons(self, module, inp, out):
        mask = self.prune_mask[module]
        output = out*mask
        
        self.activations.append(output)
        return output
        
    def forward(self, x, prune_mask=None):
        if prune_mask:
            self.add_prune_mask(prune_mask)
            if len(self.forward_hook) == 0:
                self.add_hook()
        
        y = self.net(x)
        self.remove_hook()
        return y
        
        
    def add_hook(self):
        if len(self.forward_hook) > 0:
            self.remove_hook()
            
        self.forward_hook = {}
        for name, module in list(self.net._modules.items()):
            if isinstance(module, torch.nn.Linear):
                hook = module.register_forward_hook(self.prune_neurons)
                self.forward_hook[module] = hook
        return
        
    def remove_hook(self):       
        for module in self.forward_hook.keys():
            hook = self.forward_hook[module]
            hook.remove()
        self.forward_hook = {}
        self.prune_mask = {}
        self.activations = []
        return

In [12]:
##_start everything

In [13]:
expindx = -1

In [14]:
%matplotlib inline

for configno in [0,1,2,3,4,5]:
    expindx += 1
    
    ## Initialization
    layer_dims = configs[configno]
    net = get_mlp(layer_dims, batch_norm=True)
    optimizer = torch.optim.Adam(net.parameters(), lr=0.003)
    criterion = nn.CrossEntropyLoss()

    ## Training
    for epoch in range(100):
        yout = net(xx)

        loss = criterion(yout, yy)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        error = float(loss)
        print(epoch, 'Error = ', error)

        with torch.no_grad():
            yout = net(xx)
            out = torch.argmax(yout, axis=1)
            acc = (out.data.numpy() == np.array(train_label_)).astype(np.float).mean()
            print("Accuracy: ", acc)

    ## No Batch Norm
    net_ = remove_batchnorm(net, layer_dims)
    yout = net_(xx)
    out = torch.argmax(yout, axis=1)
    acc = (out.data.numpy() == np.array(train_label_)).astype(np.float).mean()
    print("Accuracy noBN: ", acc)

    ### Comparison
    methods = []
    classes = []

    ## taylor fo modified
    methods = prunelib.taylorfo_mode_list[:3]
    classes += [Importance_TaylorFO_Modified_BN(net, criterion, config=prunelib.taylorfo_mode_config[method]) for method in methods]


    ## Molchanov_group, APnZ, Magnitude
    methods += ["Molchanov_group", "Molchanov_BN"]
    classes += [
                prunelib.Importance_Molchanov_2019(net, criterion),
                Importance_Molchanov_BN(net, criterion),
                ]

    ## gather all importances
    importances = []
    for i in range(len(methods)):
        print(methods[i])
        imp = classes[i].compute_significance(xx, yy)
        importances.append(imp)


    xs = list(range(1, 401, 5))
    ## compute new loss and deviation
    loss_lol = []
    deviation_lol = []
    accuracy_lol = []

    pnet = Pruner(net)
    yout_normal = net.forward(xx).data.cpu()

    for i in range(len(methods)):
        print(methods[i])

        losses = []
        deviations = []
        accuracies = []
        importance = importances[i]
        for num in tqdm(xs):
            pmask = prunelib.get_pruning_mask(importance, num_prune=num)
            yout_prune = pnet.forward(xx, prune_mask=pmask).data.cpu()
            new_err = criterion(yout_prune, yy)
            deviation = ((yout_prune-yout_normal)**2).mean()
            out = torch.argmax(yout_prune, axis=1)
            acc = (out.data.numpy() == np.array(train_label_)).astype(np.float).mean()

            losses.append(new_err)
            deviations.append(deviation)
            accuracies.append(acc)

        loss_lol.append(losses)
        deviation_lol.append(deviations)
        accuracy_lol.append(accuracies)

    ## Do by removing BN

    ## backup
    methods_ = copy.deepcopy(methods)

    loss_lol_ = copy.deepcopy(loss_lol)
    deviation_lol_ = copy.deepcopy(deviation_lol)
    accuracy_lol_ = copy.deepcopy(accuracy_lol)

    net__ = net
    net = net_
    net_ = net__
    del(net__)

    methods = []
    classes = []

    ## taylor fo modified
    methods = prunelib.taylorfo_mode_list[:3]
    classes += [prunelib.Importance_TaylorFO_Modified(net, criterion, config=prunelib.taylorfo_mode_config[method]) for method in methods]


    ## Molchanov_group, APnZ, Magnitude
    methods += ["Molchanov_group"]
    classes += [
                prunelib.Importance_Molchanov_2019(net, criterion),
                ]

    ## gather all importances
    importances = []
    for i in range(len(methods)):
        print(methods[i])
        imp = classes[i].compute_significance(xx, yy)
        importances.append(imp)


    ## compute new loss and deviation
    loss_lol = []
    deviation_lol = []
    accuracy_lol = []

    pnet = Pruner(net)
    yout_normal = net.forward(xx).data.cpu()

    for i in range(len(methods)):
        print(methods[i])

        losses = []
        deviations = []
        accuracies = []
        importance = importances[i]
        for num in tqdm(xs):
            pmask = prunelib.get_pruning_mask(importance, num_prune=num)
            yout_prune = pnet.forward(xx, prune_mask=pmask).data.cpu()
            new_err = criterion(yout_prune, yy)
            deviation = ((yout_prune-yout_normal)**2).mean()
            out = torch.argmax(yout_prune, axis=1)
            acc = (out.data.numpy() == np.array(train_label_)).astype(np.float).mean()

            losses.append(new_err)
            deviations.append(deviation)
            accuracies.append(acc)

        loss_lol.append(losses)
        deviation_lol.append(deviations)
        accuracy_lol.append(accuracies)


    ########## Plotting #############
    def get_ls(i):
        if i<3:
            ls = "solid"
        elif i<6:
            ls = "dashed"
        elif i<9:
            ls = "dotted"
        elif i<12:
            ls = "dashdot"
        else:
            ls = (0, (3, 5, 1, 5, 1, 5))
        return ls


    ### loss
    plt.figure(figsize=(16, 8))
    for i in range(len(methods)):
        plt.plot(xs, loss_lol[i], label=methods[i]+" noBN", linestyle=get_ls(i))

    for i in range(len(methods_)):
        plt.plot(xs, loss_lol_[i], label=methods_[i]+" BN", linestyle=get_ls(i))

    plt.legend()
    plt.xlabel("Number of Neurons pruned")
    plt.ylabel("Loss")
    plt.savefig(f"plt_06_MNIST_prune_BN_v1_loss_{expindx}_all.svg")
    plt.close()

    ### deviation
    plt.figure(figsize=(16, 8))
    for i in range(len(methods)):
        plt.plot(xs, deviation_lol[i], label=methods[i]+" noBN", linestyle=get_ls(i))

    for i in range(len(methods_)):
        plt.plot(xs, deviation_lol_[i], label=methods_[i]+" BN", linestyle=get_ls(i))

    plt.legend()
    plt.xlabel("Number of Neurons pruned")
    plt.ylabel("Squared Deviation")
    plt.savefig(f"plt_06_MNIST_prune_BN_v1_deviation_{expindx}_all.svg")
    plt.close()

    ### deviation
    plt.figure(figsize=(16, 8))
    for i in range(len(methods)):
        plt.plot(xs, accuracy_lol[i], label=methods[i]+" noBN", linestyle=get_ls(i))

    for i in range(len(methods_)):
        plt.plot(xs, accuracy_lol_[i], label=methods_[i]+" BN", linestyle=get_ls(i))

    plt.legend()
    plt.xlabel("Number of Neurons pruned")
    plt.ylabel("Accuracy")
    plt.savefig(f"plt_06_MNIST_prune_BN_v1_accuracy_{expindx}_all.svg")
    plt.close()

0 Error =  2.380300760269165
Accuracy:  0.6671333333333334
1 Error =  1.601159691810608
Accuracy:  0.74905
2 Error =  1.314244031906128
Accuracy:  0.8003166666666667
3 Error =  1.1426172256469727
Accuracy:  0.8363833333333334
4 Error =  1.0185375213623047
Accuracy:  0.86195
5 Error =  0.9207170009613037
Accuracy:  0.8797166666666667
6 Error =  0.8393134474754333
Accuracy:  0.8923333333333333
7 Error =  0.7694326043128967
Accuracy:  0.9017333333333334
8 Error =  0.7081952691078186
Accuracy:  0.9089
9 Error =  0.6535472869873047
Accuracy:  0.9147
10 Error =  0.6040388941764832
Accuracy:  0.9185166666666666
11 Error =  0.5589151978492737
Accuracy:  0.92235
12 Error =  0.5178361535072327
Accuracy:  0.9262
13 Error =  0.4803601801395416
Accuracy:  0.9288
14 Error =  0.4461052119731903
Accuracy:  0.9316
15 Error =  0.4148976504802704
Accuracy:  0.9346333333333333
16 Error =  0.3866208791732788
Accuracy:  0.93735
17 Error =  0.36102718114852905
Accuracy:  0.9401333333333334
18 Error =  0.3378

  0%|          | 5/1875 [00:00<00:37, 49.21it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:13<00:00, 137.33it/s]
  1%|          | 14/1875 [00:00<00:14, 131.51it/s]

Molchanov_BN


100%|██████████| 1875/1875 [00:12<00:00, 146.79it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [01:31<00:00,  1.14s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [01:28<00:00,  1.11s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [01:29<00:00,  1.12s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [01:29<00:00,  1.12s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_BN


100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


taylorfo
taylorfo_abs
taylorfo_sq


  2%|▏         | 39/1875 [00:00<00:04, 388.23it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:05<00:00, 357.65it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [00:28<00:00,  2.84it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [00:28<00:00,  2.78it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [00:29<00:00,  2.67it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [00:28<00:00,  2.79it/s]


0 Error =  2.3606786727905273
Accuracy:  0.6008666666666667
1 Error =  1.728194236755371
Accuracy:  0.7199
2 Error =  1.4097765684127808
Accuracy:  0.7581333333333333
3 Error =  1.1881825923919678
Accuracy:  0.79055
4 Error =  1.0256785154342651
Accuracy:  0.8250666666666666
5 Error =  0.8987904191017151
Accuracy:  0.8571833333333333
6 Error =  0.7957369685173035
Accuracy:  0.8786333333333334
7 Error =  0.7103487253189087
Accuracy:  0.8914666666666666
8 Error =  0.6383370161056519
Accuracy:  0.8992833333333333
9 Error =  0.5763058066368103
Accuracy:  0.9048166666666667
10 Error =  0.5224504470825195
Accuracy:  0.9085
11 Error =  0.47590088844299316
Accuracy:  0.9128
12 Error =  0.43596014380455017
Accuracy:  0.9158
13 Error =  0.4017631411552429
Accuracy:  0.91915
14 Error =  0.372287780046463
Accuracy:  0.9224
15 Error =  0.3466564118862152
Accuracy:  0.9256166666666666
16 Error =  0.32413533329963684
Accuracy:  0.9284333333333333
17 Error =  0.3041439652442932
Accuracy:  0.9312333333

  1%|          | 20/1875 [00:00<00:09, 196.26it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:09<00:00, 190.77it/s]
  1%|          | 18/1875 [00:00<00:10, 172.70it/s]

Molchanov_BN


100%|██████████| 1875/1875 [00:10<00:00, 185.60it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_BN


100%|██████████| 80/80 [00:57<00:00,  1.39it/s]


taylorfo
taylorfo_abs
taylorfo_sq


  2%|▏         | 45/1875 [00:00<00:04, 446.35it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:04<00:00, 453.96it/s]
  1%|▏         | 1/80 [00:00<00:14,  5.34it/s]

taylorfo


100%|██████████| 80/80 [00:14<00:00,  5.53it/s]
  1%|▏         | 1/80 [00:00<00:14,  5.27it/s]

taylorfo_abs


100%|██████████| 80/80 [00:14<00:00,  5.60it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [00:14<00:00,  5.61it/s]
  1%|▏         | 1/80 [00:00<00:14,  5.53it/s]

Molchanov_group


100%|██████████| 80/80 [00:14<00:00,  5.51it/s]


0 Error =  2.3627002239227295
Accuracy:  0.6007
1 Error =  1.71243155002594
Accuracy:  0.7745333333333333
2 Error =  1.3677042722702026
Accuracy:  0.8284166666666667
3 Error =  1.1379826068878174
Accuracy:  0.8587166666666667
4 Error =  0.9613863825798035
Accuracy:  0.8769666666666667
5 Error =  0.8164313435554504
Accuracy:  0.8906833333333334
6 Error =  0.6992300748825073
Accuracy:  0.9012666666666667
7 Error =  0.6054764986038208
Accuracy:  0.9085833333333333
8 Error =  0.5303746461868286
Accuracy:  0.9147166666666666
9 Error =  0.4697955548763275
Accuracy:  0.9200166666666667
10 Error =  0.4201180338859558
Accuracy:  0.9251333333333334
11 Error =  0.37853291630744934
Accuracy:  0.9296
12 Error =  0.34342825412750244
Accuracy:  0.9333166666666667
13 Error =  0.31380459666252136
Accuracy:  0.9369166666666666
14 Error =  0.28865745663642883
Accuracy:  0.9404666666666667
15 Error =  0.26695194840431213
Accuracy:  0.9435
16 Error =  0.2479027807712555
Accuracy:  0.9464333333333333
17 Err

  1%|          | 13/1875 [00:00<00:14, 125.00it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:15<00:00, 118.84it/s]
  0%|          | 9/1875 [00:00<00:21, 88.76it/s]

Molchanov_BN


100%|██████████| 1875/1875 [00:15<00:00, 121.70it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [01:33<00:00,  1.17s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [01:36<00:00,  1.21s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [01:37<00:00,  1.22s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [01:37<00:00,  1.22s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_BN


100%|██████████| 80/80 [01:36<00:00,  1.20s/it]


taylorfo
taylorfo_abs
taylorfo_sq


  2%|▏         | 29/1875 [00:00<00:06, 284.31it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:05<00:00, 318.33it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [00:26<00:00,  2.96it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [00:25<00:00,  3.08it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [00:25<00:00,  3.09it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [00:27<00:00,  2.91it/s]


0 Error =  2.3896596431732178
Accuracy:  0.7032666666666667
1 Error =  1.4567660093307495
Accuracy:  0.8057333333333333
2 Error =  1.073710322380066
Accuracy:  0.85485
3 Error =  0.8653827905654907
Accuracy:  0.8863333333333333
4 Error =  0.7226560115814209
Accuracy:  0.9042166666666667
5 Error =  0.6153109669685364
Accuracy:  0.9151333333333334
6 Error =  0.5320311784744263
Accuracy:  0.9229333333333334
7 Error =  0.466150164604187
Accuracy:  0.9290666666666667
8 Error =  0.41293221712112427
Accuracy:  0.9339833333333334
9 Error =  0.36885178089141846
Accuracy:  0.93855
10 Error =  0.3315570652484894
Accuracy:  0.9423666666666667
11 Error =  0.2997400164604187
Accuracy:  0.9460666666666666
12 Error =  0.2726232707500458
Accuracy:  0.9491333333333334
13 Error =  0.24941588938236237
Accuracy:  0.9516
14 Error =  0.22927738726139069
Accuracy:  0.9538666666666666
15 Error =  0.21163764595985413
Accuracy:  0.9564833333333334
16 Error =  0.19611820578575134
Accuracy:  0.9586
17 Error =  0.1

  0%|          | 6/1875 [00:00<00:34, 54.72it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:27<00:00, 67.34it/s]
  0%|          | 6/1875 [00:00<00:31, 58.90it/s]

Molchanov_BN


100%|██████████| 1875/1875 [00:27<00:00, 68.36it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [03:02<00:00,  2.28s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [03:04<00:00,  2.31s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [03:02<00:00,  2.28s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [03:06<00:00,  2.33s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_BN


100%|██████████| 80/80 [03:36<00:00,  2.70s/it]


taylorfo
taylorfo_abs
taylorfo_sq


  1%|          | 11/1875 [00:00<00:17, 106.18it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:11<00:00, 164.58it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [01:06<00:00,  1.20it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [01:06<00:00,  1.21it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [01:09<00:00,  1.15it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [01:08<00:00,  1.17it/s]


0 Error =  2.4557838439941406
Accuracy:  0.7679166666666667
1 Error =  0.981582522392273
Accuracy:  0.8533833333333334
2 Error =  0.6418787837028503
Accuracy:  0.88105
3 Error =  0.4934737980365753
Accuracy:  0.8954166666666666
4 Error =  0.4059355854988098
Accuracy:  0.9068
5 Error =  0.34812483191490173
Accuracy:  0.9140333333333334
6 Error =  0.30893993377685547
Accuracy:  0.9208833333333334
7 Error =  0.2802768349647522
Accuracy:  0.9260166666666667
8 Error =  0.25746989250183105
Accuracy:  0.93035
9 Error =  0.23845353722572327
Accuracy:  0.9343666666666667
10 Error =  0.2222716212272644
Accuracy:  0.9376833333333333
11 Error =  0.20838865637779236
Accuracy:  0.9413833333333333
12 Error =  0.19638124108314514
Accuracy:  0.9445666666666667
13 Error =  0.18586961925029755
Accuracy:  0.9471833333333334
14 Error =  0.1765054315328598
Accuracy:  0.94975
15 Error =  0.16801811754703522
Accuracy:  0.95185
16 Error =  0.1602671891450882
Accuracy:  0.9537833333333333
17 Error =  0.15323750

  1%|          | 10/1875 [00:00<00:19, 94.93it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:24<00:00, 77.85it/s]
  0%|          | 9/1875 [00:00<00:21, 87.63it/s]

Molchanov_BN


100%|██████████| 1875/1875 [00:21<00:00, 87.54it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [02:44<00:00,  2.05s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [02:35<00:00,  1.94s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [02:45<00:00,  2.07s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [02:41<00:00,  2.01s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_BN


100%|██████████| 80/80 [02:45<00:00,  2.07s/it]


taylorfo
taylorfo_abs
taylorfo_sq


  1%|          | 21/1875 [00:00<00:08, 207.52it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:09<00:00, 205.06it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [01:07<00:00,  1.18it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [00:59<00:00,  1.33it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [01:05<00:00,  1.21it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [01:02<00:00,  1.29it/s]


0 Error =  2.419769048690796
Accuracy:  0.7290333333333333
1 Error =  1.011606216430664
Accuracy:  0.8187166666666666
2 Error =  0.6862931251525879
Accuracy:  0.8563833333333334
3 Error =  0.5505363941192627
Accuracy:  0.8743
4 Error =  0.47887369990348816
Accuracy:  0.8848333333333334
5 Error =  0.4327890872955322
Accuracy:  0.8924333333333333
6 Error =  0.3978101313114166
Accuracy:  0.8977
7 Error =  0.3694283664226532
Accuracy:  0.90225
8 Error =  0.34634485840797424
Accuracy:  0.9056666666666666
9 Error =  0.3275698125362396
Accuracy:  0.9091333333333333
10 Error =  0.31199923157691956
Accuracy:  0.91205
11 Error =  0.29876142740249634
Accuracy:  0.9151666666666667
12 Error =  0.2872227430343628
Accuracy:  0.9177
13 Error =  0.2769559919834137
Accuracy:  0.92005
14 Error =  0.2676680386066437
Accuracy:  0.9225
15 Error =  0.259153813123703
Accuracy:  0.9249
16 Error =  0.251282662153244
Accuracy:  0.9273666666666667
17 Error =  0.2439635694026947
Accuracy:  0.9297333333333333
18 Er

  1%|          | 16/1875 [00:00<00:12, 152.75it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:15<00:00, 117.22it/s]
  1%|          | 11/1875 [00:00<00:17, 107.21it/s]

Molchanov_BN


100%|██████████| 1875/1875 [00:14<00:00, 133.52it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [01:36<00:00,  1.21s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [01:48<00:00,  1.35s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [01:37<00:00,  1.22s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [01:30<00:00,  1.13s/it]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_BN


100%|██████████| 80/80 [01:30<00:00,  1.13s/it]


taylorfo
taylorfo_abs
taylorfo_sq


  2%|▏         | 31/1875 [00:00<00:06, 306.30it/s]

Molchanov_group


100%|██████████| 1875/1875 [00:05<00:00, 345.11it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo


100%|██████████| 80/80 [00:44<00:00,  1.78it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_abs


100%|██████████| 80/80 [00:44<00:00,  1.78it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

taylorfo_sq


100%|██████████| 80/80 [00:44<00:00,  1.79it/s]
  0%|          | 0/80 [00:00<?, ?it/s]

Molchanov_group


100%|██████████| 80/80 [00:43<00:00,  1.82it/s]
