## EKSPERYMENT 1
### Komputer.

Część 1: Przygotowanie danych i trenowanie klasyfikatorów

Część 2: Dane, metoda

Część 3: Badanie jakości w strumieniu o zmiennej strudności

## Część 1: Trenowanie

In [10]:
import torchvision
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

# Prepare trainig data
train_data = torchvision.datasets.SVHN('./files/', 
                                  split='test', #Tak.
                                  download=True)

train_X = (torch.tensor(train_data.data)/255).to(torch.float)
train_y = train_data.labels

dataset = TensorDataset(torch.Tensor(train_X),torch.Tensor(train_y))
dataloader = DataLoader(dataset, batch_size=64)

print(train_X.shape)
print(train_y.shape)

Using downloaded and verified file: ./files/test_32x32.mat
torch.Size([26032, 3, 32, 32])
(26032,)


In [11]:
from architectures import CNN, CNN1_10_Network, CNN1_5_Network, CNN2_10_20_Network, CNN2_5_10_Network, CNN3_5_10_20_Network, FC_Network

# Initialize and train classifiers
max_training_epochs = 250
training_support_level = 0.8

clfs = [
    CNN(architecure=FC_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN1_5_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN1_10_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN2_5_10_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN2_10_20_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN3_5_10_20_Network(img_depth=3, x_input_size=32)),  
]

loss_fn = torch.nn.CrossEntropyLoss()

for clf_id, clf in enumerate(clfs):
    optimizer = torch.optim.SGD(clf.parameters(), lr=1e-2)

    for e in range(max_training_epochs):
        if e==0:
            clf.train(dataloader, loss_fn, optimizer)
        else:

            proba = nn.Softmax(dim=1)( clf(train_X))
            max_proba = torch.max(proba, dim=1)[0] 
            mean_proba = torch.mean(max_proba).detach().numpy() # średnie wsparcie decyzyjne

            if mean_proba>training_support_level:
                print(clf_id, e, mean_proba)
                break
            
            clf.train(dataloader, loss_fn, optimizer)
        
        if e%50==1:
            print(clf_id, e, mean_proba)
            

0 1 0.18599333
0 51 0.74674827
0 73 0.8031468
1 1 0.18966399
1 51 0.7343835
1 101 0.78045094
1 151 0.79133505
1 201 0.79741496
1 230 0.8000609
2 1 0.1945208
2 50 0.80065465
3 1 0.18893427
3 21 0.80185467
4 1 0.18803671
4 23 0.801934
5 1 0.18952617
5 32 0.80242145


In [17]:
# Save classifers
for c_id, c in enumerate(clfs):
    torch.save(c, 'models/%i.pt' % c_id)

## Część 2: Przygotowanie danych i CDoS

In [18]:
import numpy as np
from sklearn.decomposition import PCA
from ConditionalEvidenceStream import ConditionalEvidenceStream
from utils import make_condition_map, mix_to_factor
import concepts
import torch
import torchvision

# Load data
stream_data = torchvision.datasets.SVHN('./files/', 
                                  split='train', 
                                  download=True)

X = torch.tensor(stream_data.data)/255
y = stream_data.labels

X_pca = PCA(n_components=0.8).fit_transform(X.reshape(X.shape[0],-1))
X_pca -= np.mean(X_pca, axis=0)
X_pca /= np.std(X_pca, axis=0)

factor = mix_to_factor(X_pca)

Using downloaded and verified file: ./files/train_32x32.mat


In [20]:
# Load models

clfs=[]
for c_id in range(6):
    clfs.append(torch.load('models/%i.pt' % c_id))

In [21]:
from Method import CDoS_T

# Prepare method
thresholds = [1., 0.95, 0.9, 0.85, 0.8, 0.75]
cdos = CDoS_T(clfs=clfs,
           thresholds=thresholds)


## Część 3: Eksperyment

In [24]:
import time
from sklearn.metrics import accuracy_score

# Experimental setup
n_chunks = 1000
chunk_size = [25, 250, 500]
n_cycles = [5, 10, 25]
modes = {
    'instant': {'mode': 'instant'},
    'normal_1': {'mode': 'normal', 'sigma': 1},
    'normal_3': {'mode': 'normal', 'sigma': 3}
    }

accs = np.zeros((len(chunk_size), len(n_cycles), len(modes), n_chunks, 7))
times = np.zeros((len(chunk_size), len(n_cycles), len(modes), n_chunks, 7))

for cs_id, cs in enumerate(chunk_size):
    for n_c_id, nc in enumerate(n_cycles):
        for m_id, mode in enumerate(modes):

            condition_map = make_condition_map(n_cycles=nc,
                                            n_concepts=500,
                                            factor=factor,
                                            factor_range=(0.1,0.9))

            cp = concepts.concept_proba(n_concepts=500,
                                        n_chunks=n_chunks,
                                        normalize=True,
                                        **modes[mode])

            stream = ConditionalEvidenceStream(X, y,
                                            condition_map.T,
                                            cp,
                                            chunk_size=cs,
                                            fragile=False)
            
            

            
            for chunk_id in range(n_chunks):
                _X, _y = stream.get_chunk()
                
                # Regular clfs
                for c_id, c in enumerate(clfs):
                    start = time.time()
                    proba = nn.Softmax(dim=1)(c(_X))
                    p = torch.argmax(proba, dim=1)    
                    elapsed = time.time()-start
                    
                    accs[cs_id, n_c_id, m_id, chunk_id, c_id] = accuracy_score(_y, p)
                    times[cs_id, n_c_id, m_id, chunk_id, c_id] = elapsed
                    
                #CDos
                start = time.time()
                p = cdos.predict(_X)
                elapsed = time.time()-start
                
                accs[cs_id, n_c_id, m_id, chunk_id, -1] = accuracy_score(_y, p)
                times[cs_id, n_c_id, m_id, chunk_id, -1] = elapsed
                
            print(accs[cs_id, n_c_id, m_id, chunk_id])
            print(times[cs_id, n_c_id, m_id, chunk_id])
            
            np.save('e1_accs.npy', accs)
            np.save('e1_times.npy', times)
            
            


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.56 0.68 0.72 0.64 0.68 0.52 0.68]
[0.00141311 0.00092435 0.00154495 0.00109005 0.00163507 0.00123596
 0.00157523]
[0.8  0.8  0.84 0.84 0.8  0.84 0.84]
[0.00128102 0.00087881 0.00120401 0.00117612 0.00161982 0.00120306
 0.00138116]
[0.76 0.92 0.88 0.88 0.84 0.8  0.8 ]
[0.00151205 0.00098801 0.00121427 0.00106716 0.00178528 0.00120711
 0.0012579 ]


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.76 0.64 0.72 0.8  0.76 0.72 0.72]
[0.00104904 0.00139999 0.00131989 0.00106597 0.00156999 0.0012207
 0.00158596]
[0.56 0.68 0.6  0.72 0.72 0.64 0.64]
[0.001513   0.00095701 0.00133419 0.00152087 0.00166297 0.00120711
 0.001436  ]
[0.6  0.68 0.64 0.68 0.6  0.72 0.72]
[0.00142407 0.00096393 0.00116301 0.00133181 0.00164104 0.00179505
 0.00145817]


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.68 0.76 0.72 0.8  0.68 0.76 0.76]
[0.00155592 0.00090528 0.00121999 0.00169802 0.00260377 0.00146389
 0.00137901]
[0.64 0.68 0.72 0.76 0.68 0.76 0.76]
[0.00101876 0.00086904 0.00123501 0.00107312 0.00149417 0.00125885
 0.00126696]
[0.68 0.8  0.72 0.76 0.8  0.68 0.8 ]
[0.00148869 0.00092793 0.00120711 0.00113416 0.00150394 0.001261
 0.00148606]


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.712 0.756 0.76  0.808 0.792 0.744 0.744]
[0.00400209 0.00630498 0.00857401 0.00746298 0.0105679  0.00697207
 0.00763392]
[0.688 0.74  0.748 0.756 0.74  0.736 0.736]
[0.00414896 0.00600505 0.00878286 0.00708199 0.01030803 0.00791383
 0.00731397]
[0.652 0.692 0.712 0.68  0.704 0.692 0.692]
[0.0035429  0.00512004 0.00847626 0.00712991 0.01030707 0.00696087
 0.00762391]


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.668 0.724 0.76  0.74  0.748 0.72  0.72 ]
[0.00448799 0.00650692 0.00900197 0.006953   0.01023889 0.00650334
 0.00704432]
[0.704 0.756 0.752 0.804 0.768 0.732 0.732]
[0.00377607 0.00576067 0.00828195 0.00766277 0.00956297 0.00730705
 0.00723505]
[0.64  0.712 0.76  0.728 0.748 0.68  0.68 ]
[0.00388312 0.00618291 0.00831985 0.00692892 0.00906992 0.00716114
 0.00753522]


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.712 0.692 0.716 0.744 0.748 0.708 0.708]
[0.00392985 0.00614977 0.00806808 0.00695395 0.01044202 0.00781369
 0.00847602]
[0.704 0.752 0.788 0.792 0.784 0.784 0.784]
[0.0041492  0.00500035 0.00849009 0.00623107 0.0101409  0.00677896
 0.00758719]
[0.668 0.7   0.708 0.72  0.716 0.74  0.74 ]
[0.00426006 0.00589418 0.0087049  0.00645113 0.00949216 0.00750804
 0.00886583]


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.7   0.718 0.734 0.754 0.732 0.698 0.698]
[0.00763011 0.01203704 0.01631999 0.01475191 0.03061485 0.02939606
 0.02373981]
[0.72  0.748 0.778 0.794 0.774 0.77  0.77 ]
[0.0084281  0.0138092  0.01839399 0.0151031  0.02097702 0.01564598
 0.01869702]
[0.732 0.734 0.754 0.78  0.778 0.744 0.744]
[0.00843692 0.0113461  0.01487398 0.01325226 0.01897287 0.01401305
 0.01402712]


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.664 0.702 0.742 0.74  0.738 0.71  0.71 ]
[0.0083158  0.01103997 0.01545691 0.01315618 0.01863503 0.01408696
 0.01547885]
[0.698 0.76  0.744 0.758 0.788 0.712 0.788]
[0.00802422 0.01098394 0.01481581 0.01312685 0.01894808 0.01458502
 0.01754498]
[0.7   0.708 0.734 0.74  0.726 0.732 0.732]
[0.00730014 0.01154685 0.01722431 0.01234007 0.01884198 0.01476932
 0.01469803]


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


[0.706 0.748 0.766 0.768 0.786 0.748 0.748]
[0.00747895 0.01168895 0.01581883 0.01336098 0.01960111 0.01493192
 0.01474905]
[0.728 0.748 0.778 0.78  0.778 0.744 0.778]
[0.00726986 0.01180863 0.01629114 0.01400208 0.01940513 0.01488829
 0.02020907]
[0.716 0.754 0.778 0.772 0.774 0.76  0.76 ]
[0.00693488 0.0113821  0.01578593 0.01431489 0.01953101 0.01492405
 0.01623774]


## Analiza wyników

In [25]:
accs = np.load('e1_accs.npy')
times = np.load('e1_times.npy')

print(accs.shape, times.shape)

#cs_id, n_c_id, m_id, chunk_id, c_id

(3, 3, 3, 1000, 7) (3, 3, 3, 1000, 7)


In [None]:
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d

s=3
cols = plt.cm.jet(np.linspace(0,1,7))

for c_id, c in enumerate(chunk_size):
    fig, ax = plt.subplots(3,3, figsize=(12,8))
    
    for n_c_id, n_c in enumerate(n_cycles):
        for mode_id, mode in enumerate(modes):
            
            ax[n_c_id, mode_id].set_title('chunks:%i mode:%s' % (n_c, mode))
            for method_id in range(7):
                temp = accs[c_id, n_c_id, mode_id, :, method_id]
                ax[n_c_id, mode_id].plot(gaussian_filter1d(temp, s), c=cols[method_id])
    
    plt.show()
    
                