In [None]:
import numpy as np
import matplotlib.pyplot as plt

import PyNetwork
import pyopencl as cl
import pyopencl.array as cl_array

platform = cl.get_platforms()
devices = platform[0].get_devices()
context = cl.Context(devices)
queue = cl.CommandQueue(context)

In [None]:
from emnist import extract_training_samples, extract_test_samples

In [None]:
# The letters dataset is handwritten letters, as opposed to handwritten digits
# Should make the problem a bit harder
x_train, y_train = extract_training_samples('letters')
x_test, y_test = extract_test_samples('letters')

# Make it start counting from 0
y_train = y_train - 1
y_test = y_test - 1

In [None]:
N = len(np.unique(y_train))
N

In [None]:
input_shape = (28, 28)
x_train = x_train.reshape(-1, *input_shape).astype(np.float32) / 255
x_test = x_test.reshape(-1, *input_shape).astype(np.float32) / 255

labels = np.eye(N)

y_train = labels[y_train.flatten()].astype(np.float32)
y_test = labels[y_test.flatten()].astype(np.float32)

In [None]:
x_train.dtype

In [None]:
def make_model(dense_nodes, activation_functions, masks, initialized_weights, 
               optimizer, input_shape=(28, 28)):
    assert len(dense_nodes) == len(activation_functions), \
            "Dense nodes and activation functions need to have the same length"
    
    # Build the network - assume we only use dense layers
    model = PyNetwork.Sequential()
    model.add(PyNetwork.layers.Input(input_shape))
    model.add(PyNetwork.layers.Flatten())
    
    for nodes, function, mask in zip(dense_nodes, activation_functions, masks):
        dense_layer = PyNetwork.layers.Dense(nodes, activation_function=function, l2=0.0, l1=0.0,
                                             trainable_mask=mask)
        model.add(dense_layer)
    
    model.build(context, queue, loss_function='cross_entropy', optimizer=optimizer, metrics='accuracy')
    
    # Set the weights to the initialized_weights
    dense_layers = list(model.layers.values())[2:]
    for layer, W in zip(dense_layers, initialized_weights):
        if W:
            layer.W = W.copy()
    
    # Set the weights not at the mask to zero
    for layer, mask in zip(dense_layers, masks):
        if mask:
            layer.W *= mask
    
    return model

# Full Network

In [None]:
dense_nodes = [300, 100, N]
activation_functions = ['relu', 'relu', 'relu']

masks = [None] * 3
initialized_weights = [None] * 3

In [None]:
optimizer = PyNetwork.optimizers.Adam(learning_rate=0.0005)
model = make_model(dense_nodes, activation_functions, masks, initialized_weights, 
                   optimizer, input_shape=input_shape)

starting_weights = [layer.W_gpu.copy() 
                    for layer in model.layers.values() if isinstance(layer, PyNetwork.layers.Dense)]

In [None]:
# Transform x_train and y_train into C-contiguous
x_train = np.ascontiguousarray(x_train)
y_train = np.ascontiguousarray(y_train)
x_test = np.ascontiguousarray(x_test)
y_test = np.ascontiguousarray(y_test)

# Send data to device
x_train_gpu = cl_array.to_device(queue, x_train)
y_train_gpu = cl_array.to_device(queue, y_train)
x_test_gpu = cl_array.to_device(queue, x_test)
y_test_gpu = cl_array.to_device(queue, y_test)

In [None]:
model.train(x_train_gpu, y_train_gpu, epochs=5, batch_size=128, verbose=True)

trained_weights = [layer.W_gpu.copy()
                   for layer in model.layers.values() if isinstance(layer, PyNetwork.layers.Dense)]

In [None]:
x_train_gpu.shape

In [None]:
print(model.evaluate(x_train_gpu, y_train_gpu))
print(model.evaluate(x_test_gpu, y_test_gpu))

# Sparse Networks

## Randomly Sampled Network

In [None]:
p = 0.5

In [None]:
masks = []
for W in trained_weights:
    W_random = np.random.rand(*W.shape)
    threshold = np.quantile(abs(W_random), p)
    mask = abs(W) > threshold
    
    masks.append(mask)
    
initialized_weights = [None] * 3
optimizer = PyNetwork.optimizers.Adam(learning_rate=0.0005)
model = make_model(dense_nodes, activation_functions, masks, initialized_weights, 
                   optimizer, input_shape=input_shape)

In [None]:
model.train(x_train, y_train, epochs=5, batch_size=128, verbose=True)

### Normal Pruning

In [None]:
masks = []
for W in trained_weights:
    threshold = np.quantile(abs(W), p)
    mask = abs(W) > threshold
    
    masks.append(mask)
    
initialized_weights = [None] * 3
optimizer = PyNetwork.optimizers.Adam(learning_rate=0.0005)
model = make_model(dense_nodes, activation_functions, masks, initialized_weights, 
                   optimizer, input_shape=input_shape)

In [None]:
model.train(x_train, y_train, epochs=5, batch_size=128, verbose=True)

In [None]:
print(model.evaluate(x_train, y_train))
print(model.evaluate(x_test, y_test))

### Lottery Ticket - Pruning & Use Original Initialization

In [None]:
masks = []
for W in trained_weights:
    threshold = np.quantile(abs(W), p)
    mask = abs(W) > threshold
    
    masks.append(mask)
    
initialized_weights = starting_weights
optimizer = PyNetwork.optimizers.Adam(learning_rate=0.0005)
model = make_model(dense_nodes, activation_functions, masks, initialized_weights, 
                   optimizer, input_shape=input_shape)

In [None]:
model.train(x_train, y_train, epochs=5, batch_size=128, verbose=True)

In [None]:
print(model.evaluate(x_train, y_train))
print(model.evaluate(x_test, y_test))