In [121]:
import numpy as np
import pandas as pd
import os
import cv2 
import time

import matplotlib.pyplot as plt

In [122]:
def read_Data() -> tuple[np.ndarray, np.ndarray]:
    data = []
    labels = []
    path = "/home/vvallhack/Projects/math-assistent/ocr/data"
    dir_list = os.listdir(path = path)

    for dir in dir_list:
        if dir[0] == '.': continue
        print(dir)
        img_dir = os.path.join(path,dir)
        for images in os.listdir(img_dir):
            i = 0
            image_path = os.path.join(img_dir, images)
            data.append(cv2.imread(image_path, 0).flatten())
            labels.append(i)
            i += 1

    X = np.array(data[0:2000], dtype = np.float32) / 255.0
    Y = np.array(labels)
    
    # One-hot encode the labels (assuming 10 classes)
    Y_one_hot = np.zeros((Y.size, Y.max()+1))
    Y_one_hot[np.arange(Y.size), Y] = 1

    return X, Y_one_hot

In [123]:
# data = read_Data()
# image = data[0]
# # Plot the image
# plt.imshow(image, cmap='gray')
# plt.axis('off')  # Hide axes for better visualization
# plt.show()
# # Print the size of the image
# print("Image size:", image.shape)

In [124]:
def train_test_split(X, Y, split):
    length = len(X)
    limit = int(np.floor(split*length))
    x_train = X[0:limit]
    y_train = Y[0:limit]
    x_test = X[limit:length]
    y_test = Y[limit:length]
    return x_train,y_train,x_test,y_test

def relu(input_layer):
    return np.maximum(0,input_layer)

def softmax(input_layer):
	exp_layer = np.exp(input_layer)
	softmax_layer = exp_layer/np.sum(exp_layer)
	return softmax_layer

def generate_weights(layers):
	weights = []
	np.random.seed(1337)
	for i in range(len(layers) - 1):
		#Adding 1 for bias
		w = 2*np.random.rand(layers[i]+1,layers[i+1]) - 1
		weights.append(w)
	return weights

In [125]:
#Feedforward network
def feedforward(x_vector,W):
	network = [np.append(1,np.array(x_vector))]
	for weight in W[:-1]:
		next_layer = relu(np.dot(network[-1],weight))
		network.append(np.append(1,next_layer))
	out_layer = softmax(np.dot(network[-1],W[-1]))
	network.append(out_layer)
	return network

#Backpropagation through the network
def backprop(network,y_vector,W,learning_rate):
	deltas = [np.subtract(network[-1],y_vector)]
	prev_layer = np.dot(W[-1],deltas[0])
	deltas.insert(0,prev_layer)
	for weight in list(reversed(W))[1:-1]:
		prev_layer = np.dot(weight,deltas[0][1:])
		deltas.insert(0,prev_layer)
	#Weight Update
	for l in range(len(W)):
		for i in range(len(W[l])):
			for j in range(len(W[l][i])):
				W[l][i][j] -= learning_rate*deltas[l][j]*network[l][i]

#Compute accuracy of the network for given weight parameters
def analyse_net(W,X,Y):
	correct_pred = 0
	for i in range(len(X)):
		y_pred = np.argmax(feedforward(X[i],W)[-1])
		if(y_pred==np.argmax(Y[i])):
			correct_pred+=1
	return np.round(correct_pred/i,4)

In [126]:
def train(x_train,y_train,W,epoch,learning_rate,x_test,y_test):
	for iteration in range(epoch):
		t0 = time.monotonic
		for i in range(len(x_train)):
			network = feedforward(x_train[i],W)
			backprop(network,y_train[i],W,learning_rate)
		t1 = time.monotonic
		print("Epoch",iteration+1,"Accuracy",analyse_net(W,x_train,y_train),"Time")

#Printing test data accuracy
def test_accuracy(x_test,y_test,W):
	print("Test Data Accuracy",analyse_net(W,x_test,y_test))

In [127]:
def run(hidden_layers,learning_rate,epoch,split):
	print("Epochs",epoch,"LR",learning_rate,"Hidden Layers",hidden_layers,"Split",split,sep="  ")
	X, Y = read_Data()
	input_layer = len(X[0])
	output_layer = len(Y[0])
	layers = [input_layer] + hidden_layers + [output_layer]
	W = generate_weights(layers)
	x_train,y_train,x_test,y_test = train_test_split(X,Y,split)
	train(x_train,y_train,W,epoch,learning_rate,x_test,y_test)
	test_accuracy(x_test,y_test,W)

# run([50],0.01,1,0.90)
run([55,25],0.003,5,0.9)

Epochs  5  LR  0.003  Hidden Layers  [55, 25]  Split  0.9
1
]
in
4
o
lim
y
tan
(
int
forward_slash
k
Delta
-
rightarrow
S
ascii_124
forall
[
geq
0
+
)
3
X
lt
R
l
w
theta
sum
H
pi
i
z
8
prime
v
{
!
6
infty
,
G
cos
beta
T
div
lambda
sqrt
times
leq
e
C
7
M
=
9
N
2
exists
5
mu
ldots
neq
alpha
f
p
gamma
sigma
A
q
sin
gt
log
phi
j
pm
}
u
b
d
Epoch 1 Accuracy 1.0006 Time
Epoch 2 Accuracy 1.0006 Time
Epoch 3 Accuracy 1.0006 Time
Epoch 4 Accuracy 1.0006 Time
Epoch 5 Accuracy 1.0006 Time
Test Data Accuracy 1.005
