In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data

import random, os, pathlib, time
from tqdm import tqdm
from sklearn import datasets

In [2]:
import nflib
from nflib.flows import SequentialFlow, NormalizingFlow, ActNorm, AffineConstantFlow
import nflib.coupling_flows as icf
import nflib.inn_flow as inn
import nflib.res_flow as irf

In [3]:
import dtnnlib as dtnn

In [4]:
# device = torch.device("cuda:0")
device = torch.device("cpu")

## MNIST dataset

In [5]:
import mylibrary.datasets as datasets

In [6]:
mnist = datasets.FashionMNIST()
# mnist.download_mnist()
# mnist.save_mnist()
train_data, train_label_, test_data, test_label_ = mnist.load()

train_data = train_data / 255.
test_data = test_data / 255.

# train_label = tnn.Logits.index_to_logit(train_label_)
train_size = len(train_label_)

In [7]:
## converting data to pytorch format
train_data = torch.Tensor(train_data)
test_data = torch.Tensor(test_data)
train_label = torch.LongTensor(train_label_)
test_label = torch.LongTensor(test_label_)

In [8]:
input_size = 784
output_size = 10

In [9]:
class MNIST_Dataset(data.Dataset):
    
    def __init__(self, data, label):
        self.data = data
        self.label = label
#         self._shuffle_data_()
        
    def __len__(self):
        return len(self.data)
    
#     def _shuffle_data_(self):
#         randidx = random.sample(range(len(self.data)), k=len(self.data))
#         self.data = self.data[randidx]
#         self.label = self.label[randidx]
    
    def __getitem__(self, idx):
        img, lbl = self.data[idx], self.label[idx]
        return img, lbl

In [10]:
class Subset_Dataset(data.Dataset):
    
    def __init__(self, dataset, index):
        self.data = dataset.data
        self.label = dataset.label
        self.index = index
        
    def __len__(self):
        return len(self.index)
    
    def __getitem__(self, idx):
        idx = self.index[idx]
        img, lbl = self.data[idx], self.label[idx]
        return img, lbl

In [11]:
train_dataset = MNIST_Dataset(train_data, train_label)
test_dataset = MNIST_Dataset(test_data, test_label)

In [12]:
learning_rate = 0.0003
batch_size = 50

In [13]:
train_loader = data.DataLoader(dataset=train_dataset, num_workers=4, batch_size=batch_size, shuffle=True)
test_loader = data.DataLoader(dataset=test_dataset, num_workers=4, batch_size=batch_size, shuffle=False)

In [14]:
class ConnectedClassifier_Softmax(nn.Module):
    
    def __init__(self,input_dim, num_sets, output_dim, inv_temp=1):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_sets = num_sets
        self.inv_temp = nn.Parameter(torch.ones(1)*inv_temp)
        
        self.linear = nn.Linear(input_dim, num_sets)
        self.linear.bias.data *= 0
        self.linear.weight.data *= 0.1
        self.cls_weight = nn.Parameter(torch.ones(num_sets, output_dim)/output_dim)
        self.cls_confidence = None
        
        
    def forward(self, x, hard=True):
        x = self.linear(x)
        if hard:
            x = torch.softmax(-x*1e5, dim=1)
        else:
            x = torch.softmax(-x*self.inv_temp, dim=1)
        self.cls_confidence = x
        c = torch.softmax(self.cls_weight, dim=1)
#         c = self.cls_weight
        return x@c ## since both are normalized, it is also normalized

In [15]:
class ConnectedClassifier_SoftKMeans(nn.Module):
    
    def __init__(self,input_dim, num_sets, output_dim, inv_temp=1):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_sets = num_sets
        self.inv_temp = nn.Parameter(torch.ones(1)*inv_temp)
        
        self.dt = dtnn.DistanceTransformBase(784, num_sets)
        
        ## uniform values for all class
        init_val = torch.ones(num_sets, output_dim)/output_dim

        ## class repeat sequentially
#         init_val = torch.randn(num_sets, output_dim)*0.01
#         for ns in range(num_sets):
#             init_val[ns, ns%output_dim] = 10.
        
        self.cls_weight = nn.Parameter(init_val)

        self.cls_confidence = None
        
        self.move_loss = None
        
        
    def forward(self, x, hard=False):
#         self.cls_weight.data = torch.abs(self.cls_weight.data/self.cls_weight.data.sum(dim=1, keepdim=True))
        
        x_ = x[:, :self.input_dim]
        dists = self.dt(x_)
        dists = dists/np.sqrt(self.input_dim) ### correction to make diagonal of unit square 1 in nD space
        
        if hard:
            x = torch.softmax(-dists*1e5, dim=1)
        else:
            x = torch.softmax(-dists*self.inv_temp, dim=1)
        
        self.cls_confidence = x
        c = torch.softmax(self.cls_weight, dim=1)
#         c = self.cls_weight
        return x@c ## since both are normalized, it is also normalized
    
    def set_centroid_to_data_randomly(self, data_loader, model):
        indices = np.random.permutation(len(data_loader.dataset.data))[:self.dt.centers.shape[0]]
        xx = data_loader.dataset.data[indices].to(self.dt.centers.device)
        yy = data_loader.dataset.label[indices].to(self.dt.centers.device)
        yout = model(xx)
        self.dt.centers.data = yout
        
        init_val = torch.ones(self.num_sets, self.output_dim)/self.output_dim
        for ns in range(len(indices)):
            init_val[ns, yy[ns]] = 10.
        self.cls_weight.data = init_val.to(self.cls_weight.device)
        pass

In [16]:
class ST_Sigmoid(torch.autograd.Function):
    
    @staticmethod
    def forward(ctx, x):
        zz = 1/(1+torch.exp(-x))
        ctx.save_for_backward(zz)
        
        output = (x>0).type(x.dtype)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        zz, = ctx.saved_tensors
        grad_x = None
        
        if ctx.needs_input_grad[0]:
            grad_x = zz*(1-zz)*grad_output

        return grad_x

In [17]:
class DistanceBinaryClassifier(nn.Module):
    def __init__(self, input_dim, output_dim, inv_temp=1):
        super().__init__()
        self.output_dim = output_dim
        self.centers = nn.Parameter(torch.rand(1, input_dim)*2-1)
        self.bias = nn.Parameter(torch.ones(1)*-0.5)
        self.inv_temp = nn.Parameter(torch.ones(1)*inv_temp)

#         self.actf = nn.Sigmoid()
        self.actf = ST_Sigmoid.apply
        self.class_val = nn.Parameter(torch.ones(2, output_dim)*0.5) ## [index: 0 = pos, 1 = neg]
        
    def forward(self, x, hard=False):
        self.class_val.data.abs_()
        self.class_val.data = self.class_val.data/self.class_val.data.sum(dim=1, keepdim=True)
        
        x = torch.norm(x-self.centers, dim=1, keepdim=True) + self.bias
        if hard:
            x = torch.sigmoid(-x*1e5)
        else:
            x = self.actf(-x*torch.exp(self.inv_temp))
            
        self.confidence = x
        x = x*self.class_val[:1] + (1-x)*self.class_val[1:]
        return x
    
    def set_centroid_to_data_randomly(self, data_loader, model):
        index = np.random.randint(len(data_loader.dataset.data))
        xx = data_loader.dataset.data[index:index+1].to(self.centers.device)
        yy = data_loader.dataset.label[index:index+1].to(self.centers.device)
        yout = model(xx)
        
        self.centers.data = yout
        
        init_val = torch.ones(self.output_dim)/self.output_dim
        init_val[yy[0]] = 1.
        self.class_val.data[0] = init_val.to(self.class_val.device)
        pass

In [18]:
from dtnnlib import StereographicTransform

In [19]:
class StereographicBinaryClassifier(nn.Module):
    def __init__(self, input_dim, output_dim, inv_temp=1):
        super().__init__()
        self.output_dim = output_dim
        self.linear = StereographicTransform(input_dim, 1, bias=True)
        self.inv_temp = nn.Parameter(torch.ones(1)*inv_temp)

#         self.actf = nn.Sigmoid()
        self.actf = ST_Sigmoid.apply
        self.class_val = nn.Parameter(torch.ones(2, output_dim)*0.5) ## [index: 0 = pos, 1 = neg]
        
    def forward(self, x, hard=False):
        self.class_val.data.abs_()
        self.class_val.data = self.class_val.data/self.class_val.data.sum(dim=1, keepdim=True)
        
        x = self.linear(x)
        if hard:
            x = torch.sigmoid(x*1e5)
        else:
            x = self.actf(x*torch.exp(self.inv_temp))
            
        self.confidence = x
        x = x*self.class_val[:1] + (1-x)*self.class_val[1:]
        return x
    
    def set_centroid_to_data_randomly(self, data_loader, model):
        index = np.random.randint(len(data_loader.dataset.data))
        xx = data_loader.dataset.data[index:index+1].to(self.class_val.device)
        yy = data_loader.dataset.label[index:index+1].to(self.class_val.device)
        yout = model(xx)
        
        x = yout
        x = x*self.linear.inp_scaler
        sqnorm = (x**2).sum(dim=1, keepdim=True) ## l2 norm squared
        x = x*2/(sqnorm+1)
        new_dim = (sqnorm-1)/(sqnorm+1)
        x = torch.cat((x, new_dim), dim=1)
        
        self.linear.linear.weight.data = x
        
#         init_val = torch.ones(self.output_dim)/self.output_dim
#         init_val[yy[0]] = 1.
#         self.class_val.data[0] = init_val.to(self.class_val.device)
        pass

In [20]:
# classifier = DistanceBinaryClassifier(784, 10, inv_temp=1.)
classifier = StereographicBinaryClassifier(784, 10, inv_temp=1.)

In [21]:
xx = iter(train_loader).next()[0][:5]
yout = classifier(xx)

In [22]:
yout.shape

torch.Size([5, 10])

In [23]:
actf = irf.Swish
flows = [
    ActNorm(784),
    irf.ResidualFlow(784, [784], activation=actf),
    ActNorm(784),
    irf.ResidualFlow(784, [784], activation=actf),
    ActNorm(784),
        ]

model = SequentialFlow(flows)
model = model.to(device)

In [24]:
# model = nn.Sequential(nn.Linear(784, 784, bias=False),
#                       nn.BatchNorm1d(784),
#                       nn.SELU(),
#                       nn.Linear(784, 784, bias=False),
#                       nn.BatchNorm1d(784),
#                       nn.SELU(),
#                      )

In [25]:
model.to(device)

SequentialFlow(
  (flows): ModuleList(
    (0): ActNorm()
    (1): ResidualFlow(
      (resblock): ModuleList(
        (0): Linear(in_features=784, out_features=784, bias=True)
        (1): Swish()
        (2): Linear(in_features=784, out_features=784, bias=True)
      )
    )
    (2): ActNorm()
    (3): ResidualFlow(
      (resblock): ModuleList(
        (0): Linear(in_features=784, out_features=784, bias=True)
        (1): Swish()
        (2): Linear(in_features=784, out_features=784, bias=True)
      )
    )
    (4): ActNorm()
  )
)

In [26]:
# classifier = ConnectedClassifier_SoftKMeans(784, 100, 10)
# classifier = ConnectedClassifier_Softmax(784, 10, 10)
# classifier = DistanceBinaryClassifier(784, 10, inv_temp=1.)
classifier = StereographicBinaryClassifier(784, 10, inv_temp=1.)
classifier = classifier.to(device)

In [27]:
classifier.set_centroid_to_data_randomly(train_loader, model)

In [28]:
def MSEClassificationLoss(output, target):
    zeros = torch.zeros_like(output)
    zeros[range(len(target)), target] = 1
    return nn.functional.mse_loss(output, zeros)

## Model Train

In [183]:
# criterion = nn.NLLLoss()
# criterion = nn.CrossEntropyLoss()

criterion = MSEClassificationLoss

optimizer = optim.Adam(list(model.parameters())+list(classifier.parameters()),
                       lr=0.0003)
# optimizer = optim.SGD(model.parameters(), lr=0.1)

print("number of params: ", sum(p.numel() for p in model.parameters()))

number of params:  2466466


In [184]:
# for p in model.parameters():
#     print(torch.isnan(p).type(torch.float32).sum())

In [185]:
xx = iter(test_loader).next()[0]
xx.shape

torch.Size([50, 784])

In [186]:
model(xx.to(device))

tensor([[ 0.1100, -0.1043,  0.0157,  ...,  0.0633,  0.0709,  0.0281],
        [ 0.1223, -0.0137,  0.0715,  ...,  0.0541,  0.0100, -0.0355],
        [-0.0027,  0.0780,  0.0240,  ..., -0.0418, -0.0125,  0.0831],
        ...,
        [-0.0391,  0.0216, -0.0194,  ..., -0.0229,  0.0148,  0.0838],
        [ 0.0419, -0.0427, -0.0124,  ...,  0.0515,  0.0343, -0.0059],
        [ 0.0094,  0.0329,  0.0004,  ..., -0.0092, -0.0090, -0.0079]],
       grad_fn=<AddBackward0>)

In [187]:
losses = []
train_accs = []
test_accs = []
EPOCHS = 3

index = 0
for epoch in range(EPOCHS):
    model.train()
    train_acc = 0
    train_count = 0
    for xx, yy in tqdm(train_loader):
        xx, yy = xx.to(device), yy.to(device)
        
        yout = model(xx)
        yout = classifier(yout)    
        loss = criterion(yout, yy)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(float(loss))

        outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
        correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
        train_acc += correct
        train_count += len(outputs)

    train_accs.append(float(train_acc)/train_count*100)
    train_acc = 0
    train_count = 0

    print(f'Epoch: {epoch}:{index},  Loss:{float(loss)}')
    test_count = 0
    test_acc = 0
    for xx, yy in tqdm(test_loader):
        xx, yy = xx.to(device), yy.to(device)
        with torch.no_grad():
            yout = classifier(model(xx))    
        outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
        correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
        test_acc += correct
        test_count += len(xx)
    test_accs.append(float(test_acc)/test_count*100)
    print(f'Train Acc:{train_accs[-1]:.2f}%, Test Acc:{test_accs[-1]:.2f}%')
    print()

### after each class index is finished training
print(f'\t-> Train Acc {max(train_accs)} ; Test Acc {max(test_accs)}')

100%|██████████| 1200/1200 [01:08<00:00, 17.60it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Epoch: 0:0,  Loss:0.08264584094285965


100%|██████████| 200/200 [00:02<00:00, 71.54it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Train Acc:18.46%, Test Acc:19.81%



 14%|█▍        | 174/1200 [00:10<01:00, 16.89it/s]


KeyboardInterrupt: 

In [32]:
classifier.inv_temp

Parameter containing:
tensor([0.5331], requires_grad=True)

### Hard test accuracy with count per classifier

In [33]:
test_count = 0
test_acc = 0
set_count = torch.zeros(2).to(device)
for xx, yy in tqdm(test_loader):
    xx, yy = xx.to(device), yy.to(device)
    with torch.no_grad():
        yout = classifier(model(xx), hard=True)
        count = (classifier.confidence > 0.5).type(torch.long).sum()
        set_count[0] += count
        set_count[1] += len(classifier.confidence) - count
        
    outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
    correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
    test_acc += correct
    test_count += len(xx)

print(f'Hard Test Acc:{float(test_acc)/test_count*100:.2f}%')
print(set_count.type(torch.long).tolist())

100%|██████████| 200/200 [00:03<00:00, 53.59it/s]

Hard Test Acc:18.66%
[2465, 7535]





### Hard train accuracy with count per classifier

In [34]:
test_count = 0
test_acc = 0
set_count = torch.zeros(2).to(device)
for xx, yy in tqdm(train_loader):
    xx, yy = xx.to(device), yy.to(device)
    with torch.no_grad():
        yout = classifier(model(xx), hard=True)
        count = (classifier.confidence > 0.5).type(torch.long).sum()
        set_count[0] += count
        set_count[1] += len(classifier.confidence) - count
        
    outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
    correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
    test_acc += correct
    test_count += len(xx)

print(f'Hard Test Acc:{float(test_acc)/test_count*100:.2f}%')
print(set_count.type(torch.long).tolist())

100%|██████████| 1200/1200 [00:22<00:00, 54.17it/s]

Hard Test Acc:18.74%
[14717, 45283]





In [35]:
#### classifier with class representation
torch.argmax(classifier.class_val, dim=1)

tensor([2, 7])

In [36]:
asdfsdf ## to break the code

NameError: name 'asdfsdf' is not defined

## Auto : Iterative Improvement

#### Tree structure for training

In [29]:
class ClassifierTree:
    
    def __init__(self, train_data, test_data, device):
        self.root = LocalClassifier(device)
        self.root.create_network_0(784, [784, 784], 10)
        
        self.train_data = train_data
        self.test_data = test_data
        self.device = device
        pass
    
    def display_stats(self):
        indexing = "0"
        self.root.display_stats(indexing)
        acc, tot = self.root.get_correct_train()
        train_acc = acc/tot
        acc, tot = self.root.get_correct_test()
        test_acc = acc/tot
        print(f"Final Accuracy is Train: {train_acc :.5f} Test: {test_acc :.5f}")
            
    def get_parent_node(self, index_list:list):
        parent = self.root
        index_list = index_list[1:]
        for idx in index_list[:-1]:
            parent = parent.children[idx]
        return parent
    
    def get_node(self, index_list:list):
        parent = self.root
        index_list = index_list[1:]
        for idx in index_list[:-1]:
            parent = parent.children[idx]
        child = parent.children[index_list[-1]]
        return child
    
    def get_all_child_index(self):
        child_list = []
        self.root.get_all_index([0], child_list)
        return child_list

#### Leaf node for classification

In [30]:
class LeafNode:
    def __init__(self):
        self.pred = None
        self.classes = None
        self.num_correct = None
        self.train_indices = None
        self.test_indices = None
        self.test_correct = None
        
    def display_stats(self, indexing):
        if len(self.train_indices)>0:
            train_acc = self.num_correct/len(self.train_indices)
        else:
            train_acc = -1
            
        
        print(f"[{indexing}] : Train -> {train_acc :.4f}", end=" ")
        
        if len(self.test_indices)>0:
            test_acc = self.test_correct/len(self.test_indices)
        else:
            test_acc = -1
        print(f"Test -> {test_acc :.4f}, NUM: {len(self.train_indices)}, classes: {self.pred}:{self.classes}")

    def get_correct_train(self):
        return self.num_correct, len(self.train_indices)
    
    def get_correct_test(self):
        return self.test_correct, len(self.test_indices)
    
    def get_all_index(self, indexing, indx_lst):
        indx_lst.append(indexing)

In [31]:
class LocalClassifier:
    
    def __init__(self, device):
        self.model = None
        self.classifier = None
        self.device = device
        
        ### for training purposes
        self.train_loader = None
        self.test_loader = None
        self.optimizer = None
        self.frozen = False
        self.criterion = None
        
        ### after freazing the model, record stats
        self.children = []
    
    def create_network_0(self, input_dim, hidden_dims:list, output_dims):
        actf = irf.Swish
        flows = []
        flows.append(ActNorm(input_dim))
        for i in range(len(hidden_dims)):
            if isinstance(hidden_dims[i], list):
                hdi = hidden_dims[i]
            else:
                hdi = [hidden_dims[i]]
            flows.append(irf.ResidualFlow(input_dim, hdi, activation=actf))
            flows.append(ActNorm(input_dim))
        
        invertible = SequentialFlow(flows)
        
#         actf = nn.SELU
#         flows = []
#         hidden_dims = [input_dim]+hidden_dims
#         for i in range(1, len(hidden_dims)):
#             flows.append(nn.Linear(hidden_dims[i-1], hidden_dims[i]))
#             flows.append(actf())
#         flows.append(nn.Linear(hidden_dims[-1], input_dim))
#         invertible = nn.Sequential(*flows)

        self.model = invertible.to(device)
#         classifier = DistanceBinaryClassifier(input_dim, output_dims)
        classifier = StereographicBinaryClassifier(input_dim, output_dims)
        self.classifier = classifier.to(device)
        
    def create_train_loader_1(self, train_dataset, index, batch_size):
        dataset = Subset_Dataset(train_dataset, index)
        print(f"Train Dataset Num: {len(index)}")
        self.train_loader = data.DataLoader(dataset=dataset,
                                            num_workers=4, 
                                            batch_size=batch_size, 
                                            shuffle=True)
        self.classifier.set_centroid_to_data_randomly(self.train_loader, self.model)
    
    def create_test_loader_2(self, test_dataset, index, batch_size):
        dataset = Subset_Dataset(test_dataset, index)
        print(f"Test Dataset Num: {len(index)}")
        self.test_loader = data.DataLoader(dataset=dataset,
                                            num_workers=4, 
                                            batch_size=batch_size, 
                                            shuffle=False)
        
    def create_optimizer_3(self, lr):
        self.optimizer = optim.Adam(list(self.model.parameters())+list(self.classifier.parameters()), 
                                    lr=lr)
        self.criterion = MSEClassificationLoss
#         self.criterion = nn.CrossEntropyLoss()

        
        
    def train_classifier_4(self, epochs, ):
        if self.frozen:
            raise ValueError("This classifier is frozen. Training it might cause errors in childern classifiers")
            
    ############# TRAINING FUNCTIONALITY BELOW ####################    
        train_accs, test_accs = [], []
        for epoch in range(epochs):
            model.train()
            classifier.train()
            train_acc = 0
            train_count = 0
            for xx, yy in tqdm(self.train_loader):
                xx, yy = xx.to(device), yy.to(device)

                yout = self.model(xx)
                yout = self.classifier(yout)    
                loss = self.criterion(yout, yy)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
                correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
                train_acc += correct
                train_count += len(outputs)

            train_accs.append(float(train_acc)/train_count*100)
            train_acc = 0
            train_count = 0

            print(f'Epoch: {epoch},  Loss:{float(loss)}')
            test_count = 0
            test_acc = 0
            for xx, yy in tqdm(self.test_loader):
                xx, yy = xx.to(device), yy.to(device)
                with torch.no_grad():
                    yout = self.classifier(self.model(xx))    
                outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
                correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
                test_acc += correct
                test_count += len(xx)
            test_accs.append(float(test_acc)/test_count*100)
            print(f'Train Acc:{train_accs[-1]:.2f}%, Test Acc:{test_accs[-1]:.2f}%')
            print()
        ### after each class index is finished training
        print(f'\t-> MAX Train Acc {max(train_accs)} ; Test Acc {max(test_accs)}')
        
        
    def freeze_and_compute_stats_5(self, MIN_POINTS):
        assert MIN_POINTS > 0
        if self.frozen:
            raise ValueError("This classifier is frozen. The stat has already been calculated")
            
#         self.frozen = True
        ### delete optimizer, frees memory
#         del self.optimizer

        ### take classifier to eval mode
        self.model.eval()
        self.classifier.eval()
        
        with torch.no_grad():
            
#             #### remove classifier with no data or few data.
#             set_count = torch.zeros(2).to(device)
#             for xx, yy in tqdm(train_loader):
#                 xx, yy = xx.to(device), yy.to(device)
#                 yout = classifier(model(xx), hard=True)
#                 count = (classifier.confidence > 0.5).type(torch.long).sum()
#                 set_count[0] += count
#                 set_count[1] += len(classifier.confidence) - count

#             #### find only the classifier having some data
#             classifier_index = [0, 1]
#             classifier_count = set_count.type(torch.long).tolist()
# #             if set_count[0] < MIN_POINTS or set_count[1] < MIN_POINTS:
# #                 ### end the freezing and ignore this training

            ###### compute stats now, from pruned tree.
            def get_Cs_Os_Ts_Zs(data_loader):
                Cs = [] ## winning classifier
                Os = [] ## output of winning classifier
                Ts = [] ## target class
                Zs = []

                for xx, yy in data_loader:
                    Ts.append(yy)
                    xx, yy = xx.to(device), yy.to(device)
                    with torch.no_grad():
                        zz = self.model(xx)
                        Zs.append(zz.data.cpu())
                        yout = self.classifier(zz, hard=True)
                        Os.append(torch.argmax(yout, dim=1).data.cpu())

                    cls_indx = (self.classifier.confidence > 0.5).type(torch.long).reshape(-1)
                    Cs.append(cls_indx)

                Cs = torch.cat(Cs, dim=0)
                Ts = torch.cat(Ts, dim=0)
                Os = torch.cat(Os, dim=0)
                Zs = torch.cat(Zs, dim=0)
                return Cs, Ts, Os, Zs


            unshuffled_data = data.DataLoader(dataset=self.train_loader.dataset,
                                                num_workers=4, 
                                                batch_size=self.train_loader.batch_size, 
                                                shuffle=False)
            Cs, Ts, Os, Zs = get_Cs_Os_Ts_Zs(unshuffled_data)
            _Cs, _Ts, _, _Zs = get_Cs_Os_Ts_Zs(self.test_loader)

            print("Hard inference on the data !")
            self.children = []
            acc = 0
            for cls_idx in range(2):
                data_idx = torch.nonzero(Cs == cls_idx)
                Ti = Ts[data_idx]
#                 print(Ti)
                ### get prediction according to data
                cls, count = torch.unique(Ti, return_counts=True, sorted=True)
                if len(count) > 0:
                    pred = cls[torch.argmax(count)]
                else:
                    pred = self.classifier.class_val[cls_idx].argmax()
                p = (Ti==pred).type(torch.float32).sum()
                acc += p

                child = LeafNode()
                child.pred = int(pred)
                child.classes = cls.tolist()
                child.num_correct = int(p)
                child.train_indices = data_idx.cpu().reshape(-1)
                
                test_idx = torch.nonzero(_Cs == cls_idx)
                test_p = (_Ts[test_idx]==pred).type(torch.float32).sum()
                child.test_indices = test_idx.cpu().reshape(-1)
                child.test_correct = int(test_p)

                self.children.append(child)

                print(f"idx: {cls_idx}\tout: {int(pred)} \t acc: {p/len(Ti)*100 :.3f} \tclasses:{cls.tolist()}")
            
            accuracy = float(acc)/len(Ts)
            print(f"Accuracy: {accuracy}")
        return accuracy#, (Zs, Ts, _Zs, _Ts)
        
        
    def display_stats(self, indexing):
        for i, c in enumerate(self.children):
            c.display_stats(indexing+f", {i}")
            
    def get_all_index(self, indexing:list, indx_lst):
        for i, c in enumerate(self.children):
            c.get_all_index(indexing+[i], indx_lst)
        pass
        

    def get_correct_train(self):
        a, b = 0, 0
        for i, c in enumerate(self.children):
            _a, _b = c.get_correct_train()
            a+= _a
            b+= _b
        return a, b
    
    def get_correct_test(self):
        a, b = 0, 0
        for i, c in enumerate(self.children):
            _a, _b = c.get_correct_test()
            a+= _a
            b+= _b
        return a, b
        
    def inference_forward(self, x):
        with torch.no_grad():
            zz = self.model(x)
            yout = self.classifier(zz, hard=True)
            return torch.argmax(classifier.cls_confidence, dim=1)

In [32]:
tree = ClassifierTree(train_dataset, test_dataset, device)

In [33]:
tree.root.create_network_0(784, [784, 784], 10)

In [34]:
tree.root.model

SequentialFlow(
  (flows): ModuleList(
    (0): ActNorm()
    (1): ResidualFlow(
      (resblock): ModuleList(
        (0): Linear(in_features=784, out_features=784, bias=True)
        (1): Swish()
        (2): Linear(in_features=784, out_features=784, bias=True)
      )
    )
    (2): ActNorm()
    (3): ResidualFlow(
      (resblock): ModuleList(
        (0): Linear(in_features=784, out_features=784, bias=True)
        (1): Swish()
        (2): Linear(in_features=784, out_features=784, bias=True)
      )
    )
    (4): ActNorm()
  )
)

In [35]:
tree.root.classifier

StereographicBinaryClassifier(
  (linear): StereographicTransform(
    (linear): Linear(in_features=785, out_features=1, bias=True)
  )
)

In [36]:
tree.root.create_train_loader_1(train_dataset, 
                                torch.arange(0, len(train_dataset), dtype=torch.long), 
                                50)

Train Dataset Num: 60000


In [37]:
tree.root.create_test_loader_2(test_dataset, 
                               torch.arange(0, len(test_dataset), dtype=torch.long), 
                               50)

Test Dataset Num: 10000


In [38]:
tree.root.create_optimizer_3(lr=0.003)

In [39]:
tree.root.train_classifier_4(5)

100%|██████████| 1200/1200 [01:12<00:00, 16.54it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Epoch: 0,  Loss:0.0799744725227356


100%|██████████| 200/200 [00:02<00:00, 71.35it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Train Acc:19.72%, Test Acc:19.99%



100%|██████████| 1200/1200 [01:37<00:00, 12.30it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Epoch: 1,  Loss:0.08179805427789688


100%|██████████| 200/200 [00:03<00:00, 63.64it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Train Acc:19.78%, Test Acc:19.98%



100%|██████████| 1200/1200 [01:12<00:00, 16.50it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Epoch: 2,  Loss:0.07908393442630768


100%|██████████| 200/200 [00:02<00:00, 67.03it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Train Acc:20.25%, Test Acc:19.99%



100%|██████████| 1200/1200 [01:12<00:00, 16.50it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Epoch: 3,  Loss:0.07965319603681564


100%|██████████| 200/200 [00:02<00:00, 75.97it/s]
  0%|          | 0/1200 [00:00<?, ?it/s]

Train Acc:20.24%, Test Acc:20.00%



100%|██████████| 1200/1200 [01:08<00:00, 17.59it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

Epoch: 4,  Loss:0.08153343200683594


100%|██████████| 200/200 [00:02<00:00, 78.87it/s]

Train Acc:19.85%, Test Acc:19.99%

	-> MAX Train Acc 20.25 ; Test Acc 20.0





In [40]:
acc = tree.root.freeze_and_compute_stats_5(MIN_POINTS=100)

Hard inference on the data !
idx: 0	out: 7 	 acc: 25.565 	classes:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
idx: 1	out: 1 	 acc: 16.415 	classes:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Accuracy: 0.19993333333333332


In [41]:
tree.display_stats()

[0, 0] : Train -> 0.2556 Test -> 0.2562, NUM: 23466, classes: 7:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1] : Train -> 0.1641 Test -> 0.1640, NUM: 36534, classes: 1:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Final Accuracy is Train: 0.19993 Test: 0.20000


In [42]:
len(tree.root.train_loader.dataset)

60000

In [43]:
tree.get_all_child_index()

[[0, 0], [0, 1]]

In [44]:
_train_data = train_data.clone()
_test_data = test_data.clone()

### Iterative Improvement over nodes

In [45]:
# MIN_POINTS = 10
# batch_size = 50

# ignore_nodes = []

In [46]:
# #################### SELECT WORST CLASSIFIER ##########################
# max_incorrect = 0
# max_inc_node = None
# train_epoch = None
# for ci in tree.get_all_child_index():
#     if ci in ignore_nodes: continue

#     node = tree.get_node(ci)
#     num_data = len(node.train_indices)
#     if num_data < MIN_POINTS: continue

#     incorrect = num_data - node.num_correct
#     if incorrect > max_incorrect:
#         max_incorrect = incorrect
#         max_inc_node = ci
#         steps_in_epoch = max(num_data/batch_size, 1)
#         train_epoch = max(int(3*1200/steps_in_epoch), 1)

# print(f"Max incorrect: {max_incorrect}, {max_inc_node}, train for: {train_epoch}")

# #################### SELECT WORST CLASSIFIER ##########################

In [47]:
# ignore_nodes

In [48]:
MIN_POINTS = 100
batch_size = 50

priority_node_list = []
notworking_node_list = []

incorrect_attempts = 0
global_epoch = 5

In [84]:
global_epoch = 5

### Rerun code below

In [180]:

### if node_list is empty, fill the list with order of incorrect elements
## if node is found not improving, push it back in the end
## if node is done improving, re-evaluate the priority:

#################### SORT CLASSIFIER ##########################
nodes = []
incorrects = []
for ci in child_indices:
    if ci in notworking_node_list: continue

    node = tree.get_node(ci)
    num_data = len(node.train_indices)
    if num_data < MIN_POINTS: 
        print(f"{ci} node has num_data < MIN_POINTS ({num_data} < {MIN_POINTS})")
        print(f"Ignoring !!")
        continue

    incorrect = num_data - node.num_correct
    nodes.append(ci)
    incorrects.append(incorrect)

incorrects, nodes = np.array(incorrects), np.array(nodes)
max_inc_node = list(nodes[incorrects.argmax()])
num_data = len(tree.get_node(max_inc_node).train_indices)
#     print(num_data)
print(f"Max incorrect: {incorrects.max()}, {max_inc_node}, train for: {train_epoch}")

steps_in_epoch = max(num_data/batch_size, 1)
train_epoch = max(int(global_epoch*1200/steps_in_epoch), 1)

## sorting
argsrt = np.argsort(incorrects)
incorrects = incorrects[argsrt]
nodes = nodes[argsrt]

## setting initial value
priority_node_list = nodes
#################### SORT CLASSIFIER ##########################

Max incorrect: 3284, [0, 1, 0, 1, 1, 0], train for: 73


In [181]:
max_inc_node

[0, 1, 0, 1, 1, 0]

In [182]:
nodes

array([list([0, 1, 1]), list([0, 0, 0]), list([0, 1, 0, 1, 1, 1, 1, 1]),
       list([0, 1, 0, 1, 1, 1, 0]), list([0, 1, 0, 1, 1, 1, 1, 0]),
       list([0, 1, 0, 1, 1, 0])], dtype=object)

In [183]:
train_epoch

68

In [185]:
#################### EXTEND SELECTED CLASSIFIER ##########################
indx = max_inc_node
parent = tree.get_parent_node(indx)
node = tree.get_node(indx)
prev_acc = node.num_correct / len(node.train_indices)
node.display_stats("")

### make classifier with only available classes
alt_node = LocalClassifier(device)
avl_cls = node.classes
num_cls = len(node.classes)
output_dim = 10
# num_sets = num_cls#*2
# init_val = torch.randn(num_sets, output_dim)*0.01
# for ns in range(num_sets):
#     init_val[ns, avl_cls[ns%num_cls]] = 2.

alt_node.create_network_0(784, [784, 784], output_dim)
# alt_node.classifier.cls_weight.data = init_val.to(device)

alt_node.create_train_loader_1(train_dataset, node.train_indices, batch_size=batch_size)
alt_node.create_test_loader_2(test_dataset, node.test_indices, batch_size=batch_size)
alt_node.create_optimizer_3(lr=0.003)

alt_node.train_classifier_4(train_epoch)

  0%|          | 0/88 [00:00<?, ?it/s]

[] : Train -> 0.2536 Test -> 0.2401, NUM: 4400, classes: 9:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Train Dataset Num: 4400
Test Dataset Num: 754


100%|██████████| 88/88 [00:05<00:00, 16.63it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 0,  Loss:0.08224451541900635


100%|██████████| 16/16 [00:00<00:00, 34.31it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:17.73%, Test Acc:21.22%



100%|██████████| 88/88 [00:05<00:00, 16.48it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 1,  Loss:0.08102723956108093


100%|██████████| 16/16 [00:00<00:00, 34.90it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.32%, Test Acc:19.36%



100%|██████████| 88/88 [00:05<00:00, 16.95it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 2,  Loss:0.08118180930614471


100%|██████████| 16/16 [00:00<00:00, 35.28it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:20.80%, Test Acc:20.69%



100%|██████████| 88/88 [00:05<00:00, 16.91it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 3,  Loss:0.08030601590871811


100%|██████████| 16/16 [00:00<00:00, 39.38it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:20.39%, Test Acc:19.36%



100%|██████████| 88/88 [00:05<00:00, 16.85it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 4,  Loss:0.08133258670568466


100%|██████████| 16/16 [00:00<00:00, 35.83it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.73%, Test Acc:19.36%



100%|██████████| 88/88 [00:05<00:00, 16.86it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 5,  Loss:0.08049238473176956


100%|██████████| 16/16 [00:00<00:00, 37.91it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.57%, Test Acc:18.30%



100%|██████████| 88/88 [00:05<00:00, 16.83it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 6,  Loss:0.08080817759037018


100%|██████████| 16/16 [00:00<00:00, 38.44it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:18.89%, Test Acc:21.22%



100%|██████████| 88/88 [00:05<00:00, 17.17it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 7,  Loss:0.08283784985542297


100%|██████████| 16/16 [00:00<00:00, 32.94it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.11%, Test Acc:21.22%



100%|██████████| 88/88 [00:05<00:00, 16.65it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 8,  Loss:0.08156988769769669


100%|██████████| 16/16 [00:00<00:00, 37.35it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.18%, Test Acc:19.36%



100%|██████████| 88/88 [00:05<00:00, 16.51it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 9,  Loss:0.08060509711503983


100%|██████████| 16/16 [00:00<00:00, 38.32it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:20.14%, Test Acc:18.83%



100%|██████████| 88/88 [00:05<00:00, 16.57it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 10,  Loss:0.08088590949773788


100%|██████████| 16/16 [00:00<00:00, 33.52it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.07%, Test Acc:19.50%



100%|██████████| 88/88 [00:05<00:00, 15.47it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 11,  Loss:0.080392025411129


100%|██████████| 16/16 [00:00<00:00, 34.28it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.43%, Test Acc:18.30%



100%|██████████| 88/88 [00:05<00:00, 15.95it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 12,  Loss:0.0832352489233017


100%|██████████| 16/16 [00:00<00:00, 37.59it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.73%, Test Acc:19.36%



100%|██████████| 88/88 [00:05<00:00, 16.08it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 13,  Loss:0.07935439050197601


100%|██████████| 16/16 [00:00<00:00, 36.07it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:18.89%, Test Acc:19.36%



100%|██████████| 88/88 [00:05<00:00, 16.94it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 14,  Loss:0.08050049096345901


100%|██████████| 16/16 [00:00<00:00, 40.01it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.68%, Test Acc:20.95%



100%|██████████| 88/88 [00:05<00:00, 16.91it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 15,  Loss:0.07997022569179535


100%|██████████| 16/16 [00:00<00:00, 39.68it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:18.77%, Test Acc:19.36%



100%|██████████| 88/88 [00:05<00:00, 17.03it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 16,  Loss:0.08224708586931229


100%|██████████| 16/16 [00:00<00:00, 39.99it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.75%, Test Acc:19.36%



100%|██████████| 88/88 [00:05<00:00, 17.11it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 17,  Loss:0.08023353666067123


100%|██████████| 16/16 [00:00<00:00, 35.64it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.77%, Test Acc:18.83%



100%|██████████| 88/88 [00:05<00:00, 17.01it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 18,  Loss:0.08237353712320328


100%|██████████| 16/16 [00:00<00:00, 33.65it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.14%, Test Acc:20.69%



100%|██████████| 88/88 [00:05<00:00, 16.90it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

Epoch: 19,  Loss:0.0823611319065094


100%|██████████| 16/16 [00:00<00:00, 36.75it/s]
  0%|          | 0/88 [00:00<?, ?it/s]

Train Acc:19.89%, Test Acc:18.83%



 80%|███████▉  | 70/88 [00:04<00:01, 15.45it/s]


KeyboardInterrupt: 

In [None]:
# alt_node.train_classifier_4(10)

In [175]:
new_acc = alt_node.freeze_and_compute_stats_5(MIN_POINTS)

Hard inference on the data !
idx: 0	out: 3 	 acc: 15.525 	classes:[0, 1, 2, 3, 4, 5, 6, 8]
idx: 1	out: 9 	 acc: 34.751 	classes:[5, 7, 8, 9]
Accuracy: 0.2137796237478622


In [176]:
if new_acc < prev_acc + 0.02:
    notworking_node_list.append(max_inc_node)
    print("New Node has lesser accuracy, ignoring !!!")
    incorrect_attempts += 1
else:
    ### replace the leaf node with Local Classifier Node
    parent.children[indx[-1]] = alt_node

In [179]:
### reevaluate the importance
child_indices = tree.get_all_child_index()
if len(child_indices) <= len(notworking_node_list) :
    print("Priority nodes all not working")
    ### priority nodes are all not working
    ### reset the priority
    notworking_node_list = []

In [178]:
## show stats of the current network
#### After modification status
tree.display_stats()
print()

[0, 0, 0] : Train -> 0.9735 Test -> 0.9722, NUM: 5477, classes: 8:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 0, 1] : Train -> 0.3331 Test -> 0.3327, NUM: 17989, classes: 9:[0, 1, 2, 3, 5, 6, 7, 8, 9]
[0, 1, 0, 0] : Train -> 0.2582 Test -> 0.2545, NUM: 12033, classes: 5:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 0] : Train -> 0.2510 Test -> 0.2568, NUM: 7497, classes: 7:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 0] : Train -> 0.2536 Test -> 0.2401, NUM: 4400, classes: 9:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 1, 0] : Train -> 0.2669 Test -> 0.2322, NUM: 2630, classes: 9:[0, 1, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 1, 1, 0] : Train -> 0.1553 Test -> 0.1241, NUM: 2847, classes: 3:[0, 1, 2, 3, 4, 5, 6, 8]
[0, 1, 0, 1, 1, 1, 1, 1] : Train -> 0.3475 Test -> 0.3535, NUM: 1246, classes: 9:[5, 7, 8, 9]
[0, 1, 1] : Train -> 0.9908 Test -> 0.9917, NUM: 5881, classes: 1:[0, 1, 2, 3, 4, 6, 8]
Final Accuracy is Train: 0.41388 Test: 0.40720



In [None]:
[0, 0] : Train -> 0.2556 Test -> 0.2562, NUM: 23466, classes: 7:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0] : Train -> 0.1955 Test -> 0.1943, NUM: 30653, classes: 2:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 1] : Train -> 0.9908 Test -> 0.9917, NUM: 5881, classes: 1:[0, 1, 2, 3, 4, 6, 8]
Final Accuracy is Train: 0.29698 Test: 0.29550

In [None]:
[0, 0] : Train -> 0.2556 Test -> 0.2562, NUM: 23466, classes: 7:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 0] : Train -> 0.2582 Test -> 0.2545, NUM: 12033, classes: 5:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1] : Train -> 0.1665 Test -> 0.1563, NUM: 18620, classes: 6:[0, 1, 2, 3, 4, 5, 6, 7, 8]
[0, 1, 1] : Train -> 0.9908 Test -> 0.9917, NUM: 5881, classes: 1:[0, 1, 2, 3, 4, 6, 8]
Final Accuracy is Train: 0.30057 Test: 0.29550

In [None]:
[0, 0, 0] : Train -> 0.9735 Test -> 0.9722, NUM: 5477, classes: 8:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 0, 1] : Train -> 0.3331 Test -> 0.3327, NUM: 17989, classes: 9:[0, 1, 2, 3, 5, 6, 7, 8, 9]
[0, 1, 0, 0] : Train -> 0.2582 Test -> 0.2545, NUM: 12033, classes: 5:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1] : Train -> 0.1665 Test -> 0.1563, NUM: 18620, classes: 6:[0, 1, 2, 3, 4, 5, 6, 7, 8]
[0, 1, 1] : Train -> 0.9908 Test -> 0.9917, NUM: 5881, classes: 1:[0, 1, 2, 3, 4, 6, 8]
Final Accuracy is Train: 0.38932 Test: 0.38290

In [None]:
[0, 0, 0] : Train -> 0.9735 Test -> 0.9722, NUM: 5477, classes: 8:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 0, 1] : Train -> 0.3331 Test -> 0.3327, NUM: 17989, classes: 9:[0, 1, 2, 3, 5, 6, 7, 8, 9]
[0, 1, 0, 0] : Train -> 0.2582 Test -> 0.2545, NUM: 12033, classes: 5:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 0] : Train -> 0.2510 Test -> 0.2568, NUM: 7497, classes: 7:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1] : Train -> 0.1703 Test -> 0.1555, NUM: 11123, classes: 6:[0, 1, 2, 3, 4, 5, 6, 7, 8]
[0, 1, 1] : Train -> 0.9908 Test -> 0.9917, NUM: 5881, classes: 1:[0, 1, 2, 3, 4, 6, 8]
Final Accuracy is Train: 0.40057 Test: 0.39490

In [None]:
[0, 0, 0] : Train -> 0.9735 Test -> 0.9722, NUM: 5477, classes: 8:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 0, 1] : Train -> 0.3331 Test -> 0.3327, NUM: 17989, classes: 9:[0, 1, 2, 3, 5, 6, 7, 8, 9]
[0, 1, 0, 0] : Train -> 0.2582 Test -> 0.2545, NUM: 12033, classes: 5:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 0] : Train -> 0.2510 Test -> 0.2568, NUM: 7497, classes: 7:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 0] : Train -> 0.2536 Test -> 0.2401, NUM: 4400, classes: 9:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 1] : Train -> 0.1705 Test -> 0.1506, NUM: 6723, classes: 6:[0, 1, 2, 3, 4, 6, 8, 9]
[0, 1, 1] : Train -> 0.9908 Test -> 0.9917, NUM: 5881, classes: 1:[0, 1, 2, 3, 4, 6, 8]
Final Accuracy is Train: 0.40670 Test: 0.40070


In [169]:
[0, 0, 0] : Train -> 0.9735 Test -> 0.9722, NUM: 5477, classes: 8:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 0, 1] : Train -> 0.3331 Test -> 0.3327, NUM: 17989, classes: 9:[0, 1, 2, 3, 5, 6, 7, 8, 9]
[0, 1, 0, 0] : Train -> 0.2582 Test -> 0.2545, NUM: 12033, classes: 5:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 0] : Train -> 0.2510 Test -> 0.2568, NUM: 7497, classes: 7:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 0] : Train -> 0.2536 Test -> 0.2401, NUM: 4400, classes: 9:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 1, 0] : Train -> 0.2669 Test -> 0.2322, NUM: 2630, classes: 9:[0, 1, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 1, 1] : Train -> 0.1705 Test -> 0.1526, NUM: 4093, classes: 6:[0, 1, 2, 3, 4, 6, 8]
[0, 1, 1] : Train -> 0.9908 Test -> 0.9917, NUM: 5881, classes: 1:[0, 1, 2, 3, 4, 6, 8]
Final Accuracy is Train: 0.41093 Test: 0.40440

SyntaxError: invalid syntax (<ipython-input-169-427bb350984a>, line 1)

In [None]:
[0, 0, 0] : Train -> 0.9735 Test -> 0.9722, NUM: 5477, classes: 8:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 0, 1] : Train -> 0.3331 Test -> 0.3327, NUM: 17989, classes: 9:[0, 1, 2, 3, 5, 6, 7, 8, 9]
[0, 1, 0, 0] : Train -> 0.2582 Test -> 0.2545, NUM: 12033, classes: 5:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 0] : Train -> 0.2510 Test -> 0.2568, NUM: 7497, classes: 7:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 0] : Train -> 0.2536 Test -> 0.2401, NUM: 4400, classes: 9:[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 1, 0] : Train -> 0.2669 Test -> 0.2322, NUM: 2630, classes: 9:[0, 1, 4, 5, 6, 7, 8, 9]
[0, 1, 0, 1, 1, 1, 1, 0] : Train -> 0.1553 Test -> 0.1241, NUM: 2847, classes: 3:[0, 1, 2, 3, 4, 5, 6, 8]
[0, 1, 0, 1, 1, 1, 1, 1] : Train -> 0.3475 Test -> 0.3535, NUM: 1246, classes: 9:[5, 7, 8, 9]
[0, 1, 1] : Train -> 0.9908 Test -> 0.9917, NUM: 5881, classes: 1:[0, 1, 2, 3, 4, 6, 8]
Final Accuracy is Train: 0.41388 Test: 0.40720