In [14]:
import numpy as np
from wisard_my import WiSARD1, generate_h3_values, WiSARD2
import matplotlib.pyplot as plt
from utils import *
from tqdm import tqdm

In [15]:
# import mnist dataset
train_dataset, test_dataset = get_mnist_dataset()

In [None]:
# apply max poolling with size 2x2
train_dataset = [(np.ravel(pooling(x.reshape(28,28), 2, 'max')), y) for x, y in train_dataset]
test_dataset = [(np.ravel(pooling(x.reshape(28,28), 2, 'max')), y) for x, y in test_dataset]
S = 28 // 2

In [None]:
# binarisation usin thermometer encoding
bits_per_input = 2
train_inputs, train_labels, val_inputs, val_labels, test_inputs, test_labels = binarize_datasets(train_dataset, test_dataset, bits_per_input)

In [None]:
# finding "bad" bits (bits which are the same almost for all obserations)
all_inputs = np.concatenate([train_inputs, test_inputs, val_inputs])
good_pixels = np.arange(S*S*2)[(np.mean(all_inputs,axis=0)<1-10**(-3)) & (np.mean(all_inputs,axis=0)>10**(-3))]
bad_pixels = np.arange(S*S*2)[(np.mean(all_inputs,axis=0)>1-10**(-3)) | (np.mean(all_inputs,axis=0)<10**(-3))]

In [None]:
img = np.mean(all_inputs,axis=0)
img[bad_pixels] = 0

i = 4
fig, axs = plt.subplots(1,2,figsize=(8,4))
axs[0].imshow(img[:S*S].reshape(S,S))
axs[1].imshow(img[S*S:].reshape(S,S))
plt.show()

In [None]:
def generate_features(img_bool, img_ind, filter_size):
    '''
    Function to generate square features of size filter_size x filter_size containing only "good" bits
    
    Parameters:
    -img_bool: Boolean array. True values denote good pixels. Bad values denote bad pixels.
    -img_ind: An array of pixel indices for which to generate features.
    '''
    
    img_size = int(np.sqrt(len(img_bool)))
    f0 = np.arange(filter_size).astype(int)
    for i in range(1, filter_size):
        f0 = np.append(f0, f0[:filter_size] + (i * img_size))
    
    features = []
    
    for i in range(img_size-filter_size+1):
        for j in range(img_size-filter_size+1):
            f = f0 + (i*img_size) + j            
            if np.all(img_bool[f]):
                features.append(img_ind[f])
    return features          

In [None]:
# obtaining features
img_bool = np.ones(S * S * 2).astype(bool)
img_bool[good_pixels] = True

img_bool_1 = img_bool[:S*S]
img_bool_2 = img_bool[S*S:]

features1 = generate_features(img_bool_1, np.arange(S*S), 3)
features2 = generate_features(img_bool_2, np.arange(S*S,2*S*S), 3)

features = np.concatenate([features1, features2])
features.shape, features

In [None]:
#defining model parameters
unit_inputs, unit_entries, unit_hashes = 9, 512, 2

In [None]:
#generating random values for h3 hashing function which will be used for feature selection and in future model
random_values = generate_h3_values(unit_inputs, unit_entries, unit_hashes)

In [None]:
#calculating acc(f) and ord(f) for each feature for bleaching values from 1 to 20
ACC = []
N_ACTIVE = []

for io in tqdm(features):
    acc = []
    n_active = []

    model = WiSARD1(10, unit_inputs, unit_entries, unit_hashes, random_values, input_order=io)

    for xv, l in zip(train_inputs, train_labels):
        model.train(xv, l)


    for bleach in range(1,21):
        model.set_bleaching(bleach)
        acc_b = 0
        n_active_b = 0
        for xv, l in zip(val_inputs, val_labels):
            predictions = model.predict(xv)
            acc_b += predictions[l]
            n_active_b += np.sum(predictions)
        
        acc_b = acc_b / len(val_inputs)
        n_active_b = n_active_b / len(val_inputs)

        acc.append(acc_b)
        n_active.append(n_active_b)
        
    ACC.append(acc)
    N_ACTIVE.append(n_active)

BLEACHES = np.array([np.arange(1,21) for i in range(len(ACC))])
ACC = np.array(ACC)
N_ACTIVE = np.array(N_ACTIVE)

In [None]:
def find_best_bleach(acc, n_active, bleaches, alpha, beta):
    '''
    Function to find the best blaeching values for all αβ-significant features.
    If feature is not αβ-significant, then set bleaching value to 0. 
    '''
    aux = n_active <= beta
    bleaches = bleaches[aux]
    acc = acc[aux]

    acc = np.round(acc, 2)
    aux = acc >= alpha

    bleaches = bleaches[aux]

    if len(bleaches) == 0:
        return 0
    else:
        return np.min(bleaches)

In [None]:
# Searching for the best α, β and corresponding bleaching values for features.
alphas = [0.92, 0.93, 0.94, 0.95, 0.96, 0.97]
betas = [5.5, 6, 6.5, 7, 7.5, 8]

accs_cheat = []
params =  []

accs = []



for alpha in alphas:
    for beta in betas:
        params.append([alpha,beta])

        best_bleaches = [find_best_bleach(acc, n_active, bleaches, alpha, beta) for acc, n_active, bleaches in  zip(ACC, N_ACTIVE, BLEACHES)]
        best_bleaches = np.array(best_bleaches)
        good_featres = features[np.array(best_bleaches)>0]

        if len(good_featres) == 0:
            accs.append(0)
        
        else:

            good_model = WiSARD2(10, unit_inputs, unit_entries, unit_hashes, random_values, good_featres)

            for xv, l in zip(train_inputs, train_labels):
                good_model.train(xv, l)

            good_model.set_bleaching(best_bleaches[best_bleaches>0])


            predictions = []
            for xv in val_inputs:
                predictions.append(good_model.predict(xv))

            correct_cheat = 0
            correct = 0
            for l, p in zip(val_labels, predictions):
                if l in p:
                    correct_cheat += 1
                if l == p[0]:
                    correct += 1
            
            accs.append(correct / len(val_labels))
            accs_cheat.append(correct_cheat / len(val_labels))
best_acc = accs[np.argmax(accs)]
best_params = params[np.argmax(accs)]

In [None]:
# finding αβ-significant features and bleach value for each feature
best_bleaches = [find_best_bleach(acc, n_active, bleaches, best_params[0], best_params[1]) for acc, n_active, bleaches in  zip(ACC, N_ACTIVE, BLEACHES)]
best_bleaches = np.array(best_bleaches)
good_featres = features[np.array(best_bleaches)>0]
good_featres.shape

In [None]:
# creating model with αβ-significant features and already found bleaching values for each feature
good_model = WiSARD2(10, unit_inputs, unit_entries, unit_hashes, random_values, good_featres)

In [None]:
# traning the model and setting bleach values
X = np.vstack([train_inputs, val_inputs])
Y = np.concatenate([train_labels, val_labels])
for xv, l in zip(X, Y):
        good_model.train(xv, l)
        
good_model.set_bleaching(best_bleaches[best_bleaches>0])

In [None]:
# runing inference
predictions = []
for xv, l in zip(test_inputs, test_labels):
    predictions.append(good_model.predict(xv))
correct = 0
for l, p in zip(test_labels, predictions):
    if l == p[0]:
        correct += 1
correct / len(test_labels)