## EKSPERYMENT 1
### Komputer.

Część 1: Przygotowanie danych i trenowanie klasyfikatorów

Część 2: Generacja strumieni i set-up

Część 3: Badanie jakości w strumieniu o zmiennej strudności

## Część 1: Trenowanie

In [2]:
import torchvision
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

# Prepare trainig data
train_data = torchvision.datasets.SVHN('./files/', 
                                  split='test', #Tak.
                                  download=True)

train_X = (torch.tensor(train_data.data)/255).to(torch.float)
train_y = train_data.labels

dataset = TensorDataset(torch.Tensor(train_X),torch.Tensor(train_y))
dataloader = DataLoader(dataset, batch_size=64)

print(train_X.shape)
print(train_y.shape)

Using downloaded and verified file: ./files/test_32x32.mat
torch.Size([26032, 3, 32, 32])
(26032,)


In [3]:
from architectures import CNN, CNN1_10_Network, CNN1_5_Network, CNN2_10_20_Network, CNN2_5_10_Network, CNN3_5_10_20_Network, FC_Network

# Initialize and train classifiers
max_training_epochs = 250
training_support_level = 0.8

clfs = [
    CNN(architecure=FC_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN1_5_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN1_10_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN2_5_10_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN2_10_20_Network(img_depth=3, x_input_size=32)),
    CNN(architecure=CNN3_5_10_20_Network(img_depth=3, x_input_size=32)),  
]

loss_fn = torch.nn.CrossEntropyLoss()

for clf_id, clf in enumerate(clfs):
    optimizer = torch.optim.SGD(clf.parameters(), lr=1e-2)

    for e in range(max_training_epochs):
        if e==0:
            clf.train(dataloader, loss_fn, optimizer)
        else:

            proba = nn.Softmax(dim=1)( clf(train_X))
            max_proba = torch.max(proba, dim=1)[0] 
            mean_proba = torch.mean(max_proba).detach().numpy() # średnie wsparcie decyzyjne

            if mean_proba>training_support_level:
                print(clf_id, e, mean_proba)
                break
            
            clf.train(dataloader, loss_fn, optimizer)
        
        if e%50==1:
            print(clf_id, e, mean_proba)
            

0 1 0.18599488
0 51 0.75039804
0 71 0.80021524
1 1 0.19139028
1 51 0.7952793
1 58 0.8006154
2 1 0.19166751
2 51 0.7975261
2 53 0.800293
3 1 0.18786322
3 36 0.8010007
4 1 0.18638173
4 16 0.80085456
5 1 0.18681403
5 45 0.8028484


In [4]:
# Save classifers
for c_id, c in enumerate(clfs):
    torch.save(c, 'models/%i.pt' % c_id)

## Część 2: Przygotowanie strumieni i CDoS

In [2]:
import numpy as np
from sklearn.decomposition import PCA
from ConditionalEvidenceStream import ConditionalEvidenceStream
from utils import make_condition_map, mix_to_factor
import concepts
import torch
import torchvision

# Load data
stream_data = torchvision.datasets.SVHN('./files/', 
                                  split='train', 
                                  download=True)

X = torch.tensor(stream_data.data)/255
y = stream_data.labels

X_pca = PCA(n_components=0.8).fit_transform(X.reshape(X.shape[0],-1))
X_pca -= np.mean(X_pca, axis=0)
X_pca /= np.std(X_pca, axis=0)

factor = mix_to_factor(X_pca)

Using downloaded and verified file: ./files/train_32x32.mat


In [3]:
# Experimental setup
n_chunks = 1000
chunk_size = [25, 250, 500]
n_cycles = [5, 10, 25]
modes = {
    'instant': {'mode': 'instant'},
    'normal_1': {'mode': 'normal', 'sigma': 1},
    'normal_3': {'mode': 'normal', 'sigma': 3}
    }

for cs_id, cs in enumerate(chunk_size):
    for n_c_id, nc in enumerate(n_cycles):
        for m_id, mode in enumerate(modes):

            condition_map = make_condition_map(n_cycles=nc,
                                            n_concepts=500,
                                            factor=factor,
                                            factor_range=(0.1,0.9))

            cp = concepts.concept_proba(n_concepts=500,
                                        n_chunks=n_chunks,
                                        normalize=True,
                                        **modes[mode])

            stream = ConditionalEvidenceStream(X, y,
                                            condition_map.T,
                                            cp,
                                            chunk_size=cs,
                                            fragile=False)
            
            
            # Nie da się tego przechować.
            # stream_X = np.zeros((n_chunks, cs, 3, 32, 32))
            # stream_y = np.zeros((n_chunks, cs))
            
            # for chunk_id in range(n_chunks):
            #     _X, _y = stream.get_chunk()
            #     stream_X[chunk_id] = _X
            #     stream_y[chunk_id] = _y
            
            # print(stream_y.shape, stream_X.shape)
            # np.save('streams/X_cs%i_nc%i_m_%s.npy' % (cs, nc, mode), stream_X)
            # np.save('streams/y_cs%i_nc%i_m_%s.npy' % (cs, nc, mode), stream_y)


  concept_proba = concept_proba / np.sum(concept_proba, axis=1)[:,None]


(1000, 25) (1000, 25, 3, 32, 32)
(1000, 25) (1000, 25, 3, 32, 32)
(1000, 25) (1000, 25, 3, 32, 32)
(1000, 25) (1000, 25, 3, 32, 32)
(1000, 25) (1000, 25, 3, 32, 32)
(1000, 25) (1000, 25, 3, 32, 32)
(1000, 25) (1000, 25, 3, 32, 32)
(1000, 25) (1000, 25, 3, 32, 32)
(1000, 25) (1000, 25, 3, 32, 32)
(1000, 250) (1000, 250, 3, 32, 32)
(1000, 250) (1000, 250, 3, 32, 32)
(1000, 250) (1000, 250, 3, 32, 32)
(1000, 250) (1000, 250, 3, 32, 32)


KeyboardInterrupt: 