In [10]:
import numpy as np
import pandas as pd

from keras.datasets import cifar10
from IPython.display import clear_output, display
import matplotlib.pyplot as plt
from time import sleep
def load_data(filename):
    """
    Loads data from a CSV file and processes it into suitable format.
    For training and validation data, it splits into features and labels.
    For test data, it returns only features and assumes the first column is a placeholder.
    """
    data = pd.read_csv(filename, header=None)
    if 'train' in filename or 'validate' in filename:
        # Split into features and labels
        labels = data.iloc[:, 0].values - 1  # Convert class labels to zero-indexed

        # labels = data.iloc[:, 0].values   # Convert class labels to zero-indexed
        features = data.iloc[:, 1:].values
    else:
        # Test data does not include labels
        labels = None
        features = data.iloc[:, 1:].values  # Ignore the placeholder column
    
    # Reshape features into 32x32x3 format for RGB images
    features = features.reshape((-1, 32, 32, 3))
    return features, labels


# Example usage
X_train_data, y_train_data = load_data('./data/train.csv')
X_validate_data, y_validate_data = load_data('./data/validate.csv')
X_test_data, _ = load_data('./data/test.csv')  # Test data doesn't have labels

In [12]:


# (X_train, y_train) , (X_test, y_test) = cifar10.load_data()
print(len(X_train_data))
X_train = X_train_data.reshape(len(X_train_data), 3, 32, 32) / 255.0
X_validate = X_validate_data.reshape(len(X_validate_data), 3, 32, 32) / 255.0
X_test = X_test_data.reshape(len(X_test_data), 3, 32, 32) / 255.0
y_train = np.eye(10)[y_train_data]
y_validate = np.eye(10)[y_validate_data]

def initialise_param_lecun_normal(FILTER_SIZE, IMG_DEPTH, scale=1.0):
    fan_in = FILTER_SIZE * FILTER_SIZE * IMG_DEPTH
    stddev = scale * np.sqrt(1./fan_in)
    shape = (IMG_DEPTH, FILTER_SIZE, FILTER_SIZE)
    return np.random.normal(loc = 0,scale = stddev,size = shape) / 9
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)
def relu(x):
    x[x < 0] = 0
    return x
def conv(l, w, b, f, convd, filter, image):
    for jj in range(0, l):
        for i in range(0, w):
            for j in range(0, w):
                convd[jj,i,j] = np.sum(image[:,i:i+f,j:j+f] * filter[jj]) + b[jj]

    return convd
def maxpool(x, f, s):
    (l, w, w) = x.shape
    pool = np.zeros((l, (w-f)//s+1,(w-f)//s+1))
    for jj in range(0,l):
        for i in range(0, w, s):
            for j in range(0, w, s):
                pool[jj,i//2,j//2] = np.max(x[jj,i:i+f,j:j+f])
    return pool
def nanargmax(a):
	idx = np.argmax(a, axis=None)
	multi_idx = np.unravel_index(idx, a.shape)
	if np.isnan(a[multi_idx]):
		nan_count = np.sum(np.isnan(a))
		idx = np.argpartition(a, -nan_count-1, axis=None)[-nan_count-1]
		multi_idx = np.unravel_index(idx, a.shape)
	return multi_idx

def forward(x, theta, convds, filters, f):
    l, l1, w1, w2, w3, b1, b2, b3 = theta
    f1, f2 = filters
    c1, c2 = convds

    m = np.array([conv(l1, w1, b1, f, c1, f1, x[i]) for i in range(len(x))])
    m = relu(m)

    n = np.array([conv(l2, w2, b2, f, c2, f2, m[i]) for i in range(len(m))])
    n = relu(n)

    # size 2, stride 2
    o = np.array([maxpool(n[i], 2, 2) for i in range(len(n))])

    # flatten
    flat = o.reshape((len(o), (w2//2) * (w2//2) *l2))

    r = flat.dot(w3) + b3

    probs = np.array([softmax(r[i]) for i in range(len(r))])

    return m, n, o, flat, r, probs
def dfilter_init(n, l2, l1, f):
    df, df_sub = [], []
    db, db_sub = [], []
    for _ in range(0, n):
        for x in range(0, l2):
            df_sub.append(np.zeros((l1,f,f)))
            db_sub.append(0)
        df.append(df_sub)
        db.append(db_sub)
        df_sub, db_sub = [], []
    
    return np.array(df), np.array(db)
def backward(x, y, theta, convds, filters, f):
    l, l1, w1, w2, w3, b1, b2, b3 = theta
    m, n, o, flat, r, probs = forward(x, theta, convds, filters, f)

    dout = probs - y.reshape(y.shape[0], 10)

    dw3 = flat.T.dot(dout)
    db3 = np.expand_dims(np.sum(dout, axis=0), axis=0)

    df = dout.dot(w3.T)

    dpool = df.T.reshape((x.shape[0], l2, w2//2, w2//2))
    dc2 = np.zeros((len(n), l2, w2, w2))

    for nn in range(len(n)):
        for jj in range(0,l):
            for i in range(0, w2, 2):
                for j in range(0, w2, 2):
                    (a,b) = nanargmax(n[nn][jj,i:i+2,j:j+2])
                    dc2[nn][jj, i+a, j+b] = dpool[nn][jj, i//2, j//2]

    dc2[n <= 0] = 0

    dc1 = np.zeros((len(m), l1, w1, w1))

    df2, db2 = dfilter_init(len(m), l2, l1, f)
    df1, db1 = dfilter_init(len(m), l1, l, f)

    for mm in range(len(m)):
        for jj in range(0, l2):
            for i in range(0, w2):
                for j in range(0, w2):
                    df2[mm][jj] += dc2[mm][jj, i, j] * m[mm][:, i:i+f, j:j+f]
                    dc1[mm][:, i:i+f, j:j+f] += dc2[mm][jj, i, j] * f2[jj]
            db2[mm][jj] = np.sum(dc2[mm][jj])

    dc1[m <= 0]=0

    for mm in range(len(m)):
        for jj in range(0, l1):
            for i in range(0, w1):
                for j in range(0, w1):
                    df1[mm][jj] += dc1[mm][jj, i, j] * x[mm][:, i:i+f, j:j+f]
            db1[mm][jj] = np.sum(dc1[mm][jj])

    return dc1, dc2, df1, df2, dw3, db1, db2, db3
def average_grads(grads):
    return [np.average(grads[i], axis=0) for i in range(len(grads))]
def optimize(grads, theta, convds, filters, lr=0.01):
    dc1, dc2, df1, df2, dw3, db1, db2, db3 = grads
    l, l1, w1, w2, w3, b1, b2, b3 = theta
    c1, c2 = convds
    f1, f2 = filters

    c1 -= dc1 * lr
    c2 -= dc2 * lr

    f1 -= df1 * lr
    f2 -= df2 * lr

    w3 -= dw3 * lr

    b1 -= db1 * lr
    b2 -= db2 * lr
    b3 -= db3 * lr

    grads = dc1, dc2, df1, df2, dw3, db1, db2, db3
    theta = l, l1, w1, w2, w3, b1, b2, b3
    convds = c1, c2
    filters = f1, f2

    return grads, theta, convds, filters
def cross_entropy(predictions, targets, epsilon=1e-12):
    """
    Computes cross entropy between targets (encoded as one-hot vectors)
    and predictions. 
    Input: predictions (N, k) ndarray
           targets (N, k) ndarray        
    Returns: scalar
    """
    predictions = np.clip(predictions, epsilon, 1. - epsilon)
    N = predictions.shape[0]
    ce = -np.sum(targets*np.log(predictions+1e-9))/N
    return ce
# because CPU convolutional takes forever to train
# we select : 5 idx where the number is 9 and 5 idx for number 1
import numpy as np
import matplotlib.pyplot as plt

# Assuming X_train, y_train, X_validate, y_validate, X_test are already defined and preprocessed correctly

np.random.seed(2342342)  # For reproducibility

# Hyperparameters and model specifications
NUM_FILT1 = 16
NUM_FILT2 = 16
IMG_DEPTH = 3
FILTER_SIZE = 5

# Initialize parameters
def initialise_param_lecun_normal(FILTER_SIZE, IMG_DEPTH, scale=1.0):
    fan_in = FILTER_SIZE * FILTER_SIZE * IMG_DEPTH
    stddev = scale * np.sqrt(1./fan_in)
    shape = (IMG_DEPTH, FILTER_SIZE, FILTER_SIZE)
    return np.random.normal(loc=0, scale=stddev, size=shape) / 9

f1 = [initialise_param_lecun_normal(FILTER_SIZE, IMG_DEPTH) for _ in range(NUM_FILT1)]
b1 = [0. for _ in range(NUM_FILT1)]
f2 = [initialise_param_lecun_normal(FILTER_SIZE, NUM_FILT1) for _ in range(NUM_FILT2)]
b2 = [0. for _ in range(NUM_FILT2)]
w3 = np.random.normal(size=(2304, 10)) / 9
b3 = np.random.normal(size=(1, 10)) / 9

# Dimensions based on filter sizes and input
l, w, _ = X_train[0].shape  # assuming X_train[0] has the shape of (depth, width, height)
l1, l2 = len(f1), len(f2)
w1 = w - FILTER_SIZE + 1
w2 = w1 - FILTER_SIZE + 1

# Setup for convolutional and fully connected layers
c1 = np.zeros((l1, w1, w1))
c2 = np.zeros((l2, w2, w2))
theta = l, l1, w1, w2, w3, b1, b2, b3
convds = c1, c2
filters = np.array(f1), np.array(f2)

losses, accuracies = [], []

# Function for calculating accuracy
def calculate_accuracy(predictions, labels):
    predicted_classes = np.argmax(predictions, axis=1)
    true_classes = np.argmax(labels, axis=1)
    correct_predictions = np.sum(predicted_classes == true_classes)
    accuracy = correct_predictions / len(labels) * 100
    return accuracy

# Training loop
for epoch in range(501):
    grads = backward(X_train, y_train, theta, convds, filters, FILTER_SIZE)
    grads = average_grads(grads)
    grads, theta, convds, filters = optimize(grads, theta, convds, filters, lr=0.1)

    #if epoch % 25 == 0:
    train_out = forward(X_train, theta, convds, filters, FILTER_SIZE)[-1]
    train_loss = cross_entropy(train_out, y_train)
    train_accuracy = calculate_accuracy(train_out, y_train)

    validate_out = forward(X_validate, theta, convds, filters, FILTER_SIZE)[-1]
    validate_loss = cross_entropy(validate_out, y_validate)
    validate_accuracy = calculate_accuracy(validate_out, y_validate)

    losses.append(train_loss)
    accuracies.append(validate_accuracy)

    print(f'Epoch: {epoch:4d}, Train Loss: {train_loss:.3f}, Train Accuracy: {train_accuracy:.2f}%, Validate Loss: {validate_loss:.3f}, Validate Accuracy: {validate_accuracy:.2f}%')

# Plot training and validation loss
plt.plot(losses)
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

# After training, predict on test data
test_out = forward(X_test, theta, convds, filters, FILTER_SIZE)[-1]
test_predictions = np.argmax(test_out, axis=1)

# Assuming you have a way to evaluate or utilize test predictions
# Here you might save or process the predictions further


8000


KeyboardInterrupt: 